21 #ifdef COMPILE_TEMPLATE_SSE
25 #define MOVQU "movdqu"
27 #define LOAD(mem,dst) \
28 MOV" "mem", "dst" \n\t"\
29 "punpcklbw "MM"7, "dst" \n\t"
30 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
31 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
32 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
33 "psrldq $2, "src" \n\t"
40 #define LOAD(mem,dst) \
41 MOV" "mem", "dst" \n\t"\
42 "punpcklbw "MM"7, "dst" \n\t"
43 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
44 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
45 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
48 #ifdef COMPILE_TEMPLATE_SSSE3
49 #define PABS(tmp,dst) \
50 "pabsw "dst", "dst" \n\t"
52 #define PABS(tmp,dst) \
53 "pxor "tmp", "tmp" \n\t"\
54 "psubw "dst", "tmp" \n\t"\
55 "pmaxsw "tmp", "dst" \n\t"
58 #define CHECK(pj,mj) \
59 MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" \
60 MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" \
61 MOVQ" "MM"2, "MM"4 \n\t"\
62 MOVQ" "MM"2, "MM"5 \n\t"\
63 "pxor "MM"3, "MM"4 \n\t"\
64 "pavgb "MM"3, "MM"5 \n\t"\
65 "pand "MANGLE(pb_1)", "MM"4 \n\t"\
66 "psubusb "MM"4, "MM"5 \n\t"\
68 "punpcklbw "MM"7, "MM"5 \n\t" \
69 MOVQ" "MM"2, "MM"4 \n\t"\
70 "psubusb "MM"3, "MM"2 \n\t"\
71 "psubusb "MM"4, "MM"3 \n\t"\
72 "pmaxub "MM"3, "MM"2 \n\t"\
73 MOVQ" "MM"2, "MM"3 \n\t"\
74 MOVQ" "MM"2, "MM"4 \n\t" \
77 "punpcklbw "MM"7, "MM"2 \n\t"\
78 "punpcklbw "MM"7, "MM"3 \n\t"\
79 "punpcklbw "MM"7, "MM"4 \n\t"\
80 "paddw "MM"3, "MM"2 \n\t"\
81 "paddw "MM"4, "MM"2 \n\t"
84 MOVQ" "MM"0, "MM"3 \n\t"\
85 "pcmpgtw "MM"2, "MM"3 \n\t" \
86 "pminsw "MM"2, "MM"0 \n\t" \
87 MOVQ" "MM"3, "MM"6 \n\t"\
88 "pand "MM"3, "MM"5 \n\t"\
89 "pandn "MM"1, "MM"3 \n\t"\
90 "por "MM"5, "MM"3 \n\t"\
91 MOVQ" "MM"3, "MM"1 \n\t"
95 "paddw "MANGLE(pw_1)", "MM"6 \n\t"\
96 "psllw $14, "MM"6 \n\t"\
97 "paddsw "MM"6, "MM"2 \n\t"\
98 MOVQ" "MM"0, "MM"3 \n\t"\
99 "pcmpgtw "MM"2, "MM"3 \n\t"\
100 "pminsw "MM"2, "MM"0 \n\t"\
101 "pand "MM"3, "MM"5 \n\t"\
102 "pandn "MM"1, "MM"3 \n\t"\
103 "por "MM"5, "MM"3 \n\t"\
104 MOVQ" "MM"3, "MM"1 \n\t"
106 void RENAME(ff_yadif_filter_line)(uint8_t *dst,
107 uint8_t *prev, uint8_t *cur, uint8_t *next,
108 int w,
int prefs,
int mrefs,
int parity,
int mode)
117 for(x=0; x<w; x+=STEP){\
119 "pxor "MM"7, "MM"7 \n\t"\
120 LOAD("(%[cur],%[mrefs])", MM"0") \
121 LOAD("(%[cur],%[prefs])", MM"1") \
122 LOAD("(%["prev2"])", MM"2") \
123 LOAD("(%["next2"])", MM"3") \
124 MOVQ" "MM"3, "MM"4 \n\t"\
125 "paddw "MM"2, "MM"3 \n\t"\
126 "psraw $1, "MM"3 \n\t" \
127 MOVQ" "MM"0, %[tmp0] \n\t" \
128 MOVQ" "MM"3, %[tmp1] \n\t" \
129 MOVQ" "MM"1, %[tmp2] \n\t" \
130 "psubw "MM"4, "MM"2 \n\t"\
131 PABS( MM"4", MM"2") \
132 LOAD("(%[prev],%[mrefs])", MM"3") \
133 LOAD("(%[prev],%[prefs])", MM"4") \
134 "psubw "MM"0, "MM"3 \n\t"\
135 "psubw "MM"1, "MM"4 \n\t"\
138 "paddw "MM"4, "MM"3 \n\t" \
139 "psrlw $1, "MM"2 \n\t"\
140 "psrlw $1, "MM"3 \n\t"\
141 "pmaxsw "MM"3, "MM"2 \n\t"\
142 LOAD("(%[next],%[mrefs])", MM"3") \
143 LOAD("(%[next],%[prefs])", MM"4") \
144 "psubw "MM"0, "MM"3 \n\t"\
145 "psubw "MM"1, "MM"4 \n\t"\
148 "paddw "MM"4, "MM"3 \n\t" \
149 "psrlw $1, "MM"3 \n\t"\
150 "pmaxsw "MM"3, "MM"2 \n\t"\
151 MOVQ" "MM"2, %[tmp3] \n\t" \
153 "paddw "MM"0, "MM"1 \n\t"\
154 "paddw "MM"0, "MM"0 \n\t"\
155 "psubw "MM"1, "MM"0 \n\t"\
156 "psrlw $1, "MM"1 \n\t" \
157 PABS( MM"2", MM"0") \
159 MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" \
160 MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" \
161 MOVQ" "MM"2, "MM"4 \n\t"\
162 "psubusb "MM"3, "MM"2 \n\t"\
163 "psubusb "MM"4, "MM"3 \n\t"\
164 "pmaxub "MM"3, "MM"2 \n\t"\
165 PSHUF(MM"3", MM"2") \
166 "punpcklbw "MM"7, "MM"2 \n\t" \
167 "punpcklbw "MM"7, "MM"3 \n\t" \
168 "paddw "MM"2, "MM"0 \n\t"\
169 "paddw "MM"3, "MM"0 \n\t"\
170 "psubw "MANGLE(pw_1)", "MM"0 \n\t" \
182 MOVQ" %[tmp3], "MM"6 \n\t" \
183 "cmpl $2, %[mode] \n\t"\
185 LOAD("(%["prev2"],%[mrefs],2)", MM"2") \
186 LOAD("(%["next2"],%[mrefs],2)", MM"4") \
187 LOAD("(%["prev2"],%[prefs],2)", MM"3") \
188 LOAD("(%["next2"],%[prefs],2)", MM"5") \
189 "paddw "MM"4, "MM"2 \n\t"\
190 "paddw "MM"5, "MM"3 \n\t"\
191 "psrlw $1, "MM"2 \n\t" \
192 "psrlw $1, "MM"3 \n\t" \
193 MOVQ" %[tmp0], "MM"4 \n\t" \
194 MOVQ" %[tmp1], "MM"5 \n\t" \
195 MOVQ" %[tmp2], "MM"7 \n\t" \
196 "psubw "MM"4, "MM"2 \n\t" \
197 "psubw "MM"7, "MM"3 \n\t" \
198 MOVQ" "MM"5, "MM"0 \n\t"\
199 "psubw "MM"4, "MM"5 \n\t" \
200 "psubw "MM"7, "MM"0 \n\t" \
201 MOVQ" "MM"2, "MM"4 \n\t"\
202 "pminsw "MM"3, "MM"2 \n\t"\
203 "pmaxsw "MM"4, "MM"3 \n\t"\
204 "pmaxsw "MM"5, "MM"2 \n\t"\
205 "pminsw "MM"5, "MM"3 \n\t"\
206 "pmaxsw "MM"0, "MM"2 \n\t" \
207 "pminsw "MM"0, "MM"3 \n\t" \
208 "pxor "MM"4, "MM"4 \n\t"\
209 "pmaxsw "MM"3, "MM"6 \n\t"\
210 "psubw "MM"2, "MM"4 \n\t" \
211 "pmaxsw "MM"4, "MM"6 \n\t" \
214 MOVQ" %[tmp1], "MM"2 \n\t" \
215 MOVQ" "MM"2, "MM"3 \n\t"\
216 "psubw "MM"6, "MM"2 \n\t" \
217 "paddw "MM"6, "MM"3 \n\t" \
218 "pmaxsw "MM"2, "MM"1 \n\t"\
219 "pminsw "MM"3, "MM"1 \n\t" \
220 "packuswb "MM"1, "MM"1 \n\t"\
229 [prefs]"r"((x86_reg)prefs),\
230 [mrefs]"r"((x86_reg)mrefs),\
233 __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
#define DECLARE_ALIGNED(n, t, v)