51 #define X8(x) x,x,x,x,x,x,x,x
63 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d,
64 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61,
65 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7,
66 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b
70 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5,
71 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04,
72 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41,
73 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df
77 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf,
78 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf,
79 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d,
80 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04
84 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746,
85 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac,
86 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df,
87 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e
91 65536, 65536, 65536, 65536,
92 3597, 3597, 3597, 3597,
93 2260, 2260, 2260, 2260,
94 1203, 1203, 1203, 1203,
100 #define ROW1 "%%xmm6"
101 #define ROW3 "%%xmm4"
102 #define ROW5 "%%xmm5"
103 #define ROW7 "%%xmm7"
105 #define CLEAR_ODD(r) "pxor "r","r" \n\t"
106 #define PUT_ODD(dst) "pshufhw $0x1B, %%xmm2, "dst" \n\t"
110 # define ROW0 "%%xmm8"
112 # define ROW2 "%%xmm9"
114 # define ROW4 "%%xmm10"
116 # define ROW6 "%%xmm11"
118 # define CLEAR_EVEN(r) CLEAR_ODD(r)
119 # define PUT_EVEN(dst) PUT_ODD(dst)
120 # define XMMS "%%xmm12"
121 # define MOV_32_ONLY "#"
123 # define TAN3 "%%xmm13"
124 # define TAN1 "%%xmm14"
129 # define REG0 "%%xmm4"
130 # define ROW2 "2*16(%0)"
131 # define REG2 "%%xmm4"
132 # define ROW4 "4*16(%0)"
133 # define REG4 "%%xmm6"
134 # define ROW6 "6*16(%0)"
135 # define REG6 "%%xmm6"
136 # define CLEAR_EVEN(r)
137 # define PUT_EVEN(dst) \
138 "pshufhw $0x1B, %%xmm2, %%xmm2 \n\t" \
139 "movdqa %%xmm2, "dst" \n\t"
140 # define XMMS "%%xmm2"
141 # define MOV_32_ONLY "movdqa "
142 # define SREG2 "%%xmm7"
143 # define TAN3 "%%xmm0"
144 # define TAN1 "%%xmm2"
148 #define ROUND(x) "paddd "MANGLE(x)
150 #define JZ(reg, to) \
151 "testl "reg","reg" \n\t" \
154 #define JNZ(reg, to) \
155 "testl "reg","reg" \n\t" \
158 #define TEST_ONE_ROW(src, reg, clear) \
160 "movq "src", %%mm1 \n\t" \
161 "por 8+"src", %%mm1 \n\t" \
162 "paddusb %%mm0, %%mm1 \n\t" \
163 "pmovmskb %%mm1, "reg" \n\t"
165 #define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \
168 "movq "row1", %%mm1 \n\t" \
169 "por 8+"row1", %%mm1 \n\t" \
170 "movq "row2", %%mm2 \n\t" \
171 "por 8+"row2", %%mm2 \n\t" \
172 "paddusb %%mm0, %%mm1 \n\t" \
173 "paddusb %%mm0, %%mm2 \n\t" \
174 "pmovmskb %%mm1, "reg1" \n\t" \
175 "pmovmskb %%mm2, "reg2" \n\t"
178 #define iMTX_MULT(src, table, rounder, put) \
179 "movdqa "src", %%xmm3 \n\t" \
180 "movdqa %%xmm3, %%xmm0 \n\t" \
181 "pshufd $0x11, %%xmm3, %%xmm1 \n\t" \
182 "punpcklqdq %%xmm0, %%xmm0 \n\t" \
183 "pmaddwd "table", %%xmm0 \n\t" \
184 "pmaddwd 16+"table", %%xmm1 \n\t" \
185 "pshufd $0xBB, %%xmm3, %%xmm2 \n\t" \
186 "punpckhqdq %%xmm3, %%xmm3 \n\t" \
187 "pmaddwd 32+"table", %%xmm2 \n\t" \
188 "pmaddwd 48+"table", %%xmm3 \n\t" \
189 "paddd %%xmm1, %%xmm0 \n\t" \
190 "paddd %%xmm3, %%xmm2 \n\t" \
191 rounder", %%xmm0 \n\t" \
192 "movdqa %%xmm2, %%xmm3 \n\t" \
193 "paddd %%xmm0, %%xmm2 \n\t" \
194 "psubd %%xmm3, %%xmm0 \n\t" \
195 "psrad $11, %%xmm2 \n\t" \
196 "psrad $11, %%xmm0 \n\t" \
197 "packssdw %%xmm0, %%xmm2 \n\t" \
202 "movdqa "MANGLE(tan3)", "TAN3" \n\t" \
203 "movdqa "MANGLE(tan1)", "TAN1" \n\t" \
205 #define iLLM_PASS(dct) \
207 "movdqa "TAN3", %%xmm1 \n\t" \
208 "movdqa "TAN1", %%xmm3 \n\t" \
209 "pmulhw %%xmm4, "TAN3" \n\t" \
210 "pmulhw %%xmm5, %%xmm1 \n\t" \
211 "paddsw %%xmm4, "TAN3" \n\t" \
212 "paddsw %%xmm5, %%xmm1 \n\t" \
213 "psubsw %%xmm5, "TAN3" \n\t" \
214 "paddsw %%xmm4, %%xmm1 \n\t" \
215 "pmulhw %%xmm7, %%xmm3 \n\t" \
216 "pmulhw %%xmm6, "TAN1" \n\t" \
217 "paddsw %%xmm6, %%xmm3 \n\t" \
218 "psubsw %%xmm7, "TAN1" \n\t" \
219 "movdqa %%xmm3, %%xmm7 \n\t" \
220 "movdqa "TAN1", %%xmm6 \n\t" \
221 "psubsw %%xmm1, %%xmm3 \n\t" \
222 "psubsw "TAN3", "TAN1" \n\t" \
223 "paddsw %%xmm7, %%xmm1 \n\t" \
224 "paddsw %%xmm6, "TAN3" \n\t" \
225 "movdqa %%xmm3, %%xmm6 \n\t" \
226 "psubsw "TAN3", %%xmm3 \n\t" \
227 "paddsw %%xmm6, "TAN3" \n\t" \
228 "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \
229 "pmulhw %%xmm4, %%xmm3 \n\t" \
230 "pmulhw %%xmm4, "TAN3" \n\t" \
231 "paddsw "TAN3", "TAN3" \n\t" \
232 "paddsw %%xmm3, %%xmm3 \n\t" \
233 "movdqa "MANGLE(tan2)", %%xmm7 \n\t" \
234 MOV_32_ONLY ROW2", "REG2" \n\t" \
235 MOV_32_ONLY ROW6", "REG6" \n\t" \
236 "movdqa %%xmm7, %%xmm5 \n\t" \
237 "pmulhw "REG6", %%xmm7 \n\t" \
238 "pmulhw "REG2", %%xmm5 \n\t" \
239 "paddsw "REG2", %%xmm7 \n\t" \
240 "psubsw "REG6", %%xmm5 \n\t" \
241 MOV_32_ONLY ROW0", "REG0" \n\t" \
242 MOV_32_ONLY ROW4", "REG4" \n\t" \
243 MOV_32_ONLY" "TAN1", (%0) \n\t" \
244 "movdqa "REG0", "XMMS" \n\t" \
245 "psubsw "REG4", "REG0" \n\t" \
246 "paddsw "XMMS", "REG4" \n\t" \
247 "movdqa "REG4", "XMMS" \n\t" \
248 "psubsw %%xmm7, "REG4" \n\t" \
249 "paddsw "XMMS", %%xmm7 \n\t" \
250 "movdqa "REG0", "XMMS" \n\t" \
251 "psubsw %%xmm5, "REG0" \n\t" \
252 "paddsw "XMMS", %%xmm5 \n\t" \
253 "movdqa %%xmm5, "XMMS" \n\t" \
254 "psubsw "TAN3", %%xmm5 \n\t" \
255 "paddsw "XMMS", "TAN3" \n\t" \
256 "movdqa "REG0", "XMMS" \n\t" \
257 "psubsw %%xmm3, "REG0" \n\t" \
258 "paddsw "XMMS", %%xmm3 \n\t" \
259 MOV_32_ONLY" (%0), "TAN1" \n\t" \
260 "psraw $6, %%xmm5 \n\t" \
261 "psraw $6, "REG0" \n\t" \
262 "psraw $6, "TAN3" \n\t" \
263 "psraw $6, %%xmm3 \n\t" \
264 "movdqa "TAN3", 1*16("dct") \n\t" \
265 "movdqa %%xmm3, 2*16("dct") \n\t" \
266 "movdqa "REG0", 5*16("dct") \n\t" \
267 "movdqa %%xmm5, 6*16("dct") \n\t" \
268 "movdqa %%xmm7, %%xmm0 \n\t" \
269 "movdqa "REG4", %%xmm4 \n\t" \
270 "psubsw %%xmm1, %%xmm7 \n\t" \
271 "psubsw "TAN1", "REG4" \n\t" \
272 "paddsw %%xmm0, %%xmm1 \n\t" \
273 "paddsw %%xmm4, "TAN1" \n\t" \
274 "psraw $6, %%xmm1 \n\t" \
275 "psraw $6, %%xmm7 \n\t" \
276 "psraw $6, "TAN1" \n\t" \
277 "psraw $6, "REG4" \n\t" \
278 "movdqa %%xmm1, ("dct") \n\t" \
279 "movdqa "TAN1", 3*16("dct") \n\t" \
280 "movdqa "REG4", 4*16("dct") \n\t" \
281 "movdqa %%xmm7, 7*16("dct") \n\t"
284 #define iLLM_PASS_SPARSE(dct) \
285 "pmulhw %%xmm4, "TAN3" \n\t" \
286 "paddsw %%xmm4, "TAN3" \n\t" \
287 "movdqa %%xmm6, %%xmm3 \n\t" \
288 "pmulhw %%xmm6, "TAN1" \n\t" \
289 "movdqa %%xmm4, %%xmm1 \n\t" \
290 "psubsw %%xmm1, %%xmm3 \n\t" \
291 "paddsw %%xmm6, %%xmm1 \n\t" \
292 "movdqa "TAN1", %%xmm6 \n\t" \
293 "psubsw "TAN3", "TAN1" \n\t" \
294 "paddsw %%xmm6, "TAN3" \n\t" \
295 "movdqa %%xmm3, %%xmm6 \n\t" \
296 "psubsw "TAN3", %%xmm3 \n\t" \
297 "paddsw %%xmm6, "TAN3" \n\t" \
298 "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \
299 "pmulhw %%xmm4, %%xmm3 \n\t" \
300 "pmulhw %%xmm4, "TAN3" \n\t" \
301 "paddsw "TAN3", "TAN3" \n\t" \
302 "paddsw %%xmm3, %%xmm3 \n\t" \
303 "movdqa "MANGLE(tan2)", %%xmm5 \n\t" \
304 MOV_32_ONLY ROW2", "SREG2" \n\t" \
305 "pmulhw "SREG2", %%xmm5 \n\t" \
306 MOV_32_ONLY ROW0", "REG0" \n\t" \
307 "movdqa "REG0", %%xmm6 \n\t" \
308 "psubsw "SREG2", %%xmm6 \n\t" \
309 "paddsw "REG0", "SREG2" \n\t" \
310 MOV_32_ONLY" "TAN1", (%0) \n\t" \
311 "movdqa "REG0", "XMMS" \n\t" \
312 "psubsw %%xmm5, "REG0" \n\t" \
313 "paddsw "XMMS", %%xmm5 \n\t" \
314 "movdqa %%xmm5, "XMMS" \n\t" \
315 "psubsw "TAN3", %%xmm5 \n\t" \
316 "paddsw "XMMS", "TAN3" \n\t" \
317 "movdqa "REG0", "XMMS" \n\t" \
318 "psubsw %%xmm3, "REG0" \n\t" \
319 "paddsw "XMMS", %%xmm3 \n\t" \
320 MOV_32_ONLY" (%0), "TAN1" \n\t" \
321 "psraw $6, %%xmm5 \n\t" \
322 "psraw $6, "REG0" \n\t" \
323 "psraw $6, "TAN3" \n\t" \
324 "psraw $6, %%xmm3 \n\t" \
325 "movdqa "TAN3", 1*16("dct") \n\t" \
326 "movdqa %%xmm3, 2*16("dct") \n\t" \
327 "movdqa "REG0", 5*16("dct") \n\t" \
328 "movdqa %%xmm5, 6*16("dct") \n\t" \
329 "movdqa "SREG2", %%xmm0 \n\t" \
330 "movdqa %%xmm6, %%xmm4 \n\t" \
331 "psubsw %%xmm1, "SREG2" \n\t" \
332 "psubsw "TAN1", %%xmm6 \n\t" \
333 "paddsw %%xmm0, %%xmm1 \n\t" \
334 "paddsw %%xmm4, "TAN1" \n\t" \
335 "psraw $6, %%xmm1 \n\t" \
336 "psraw $6, "SREG2" \n\t" \
337 "psraw $6, "TAN1" \n\t" \
338 "psraw $6, %%xmm6 \n\t" \
339 "movdqa %%xmm1, ("dct") \n\t" \
340 "movdqa "TAN1", 3*16("dct") \n\t" \
341 "movdqa %%xmm6, 4*16("dct") \n\t" \
342 "movdqa "SREG2", 7*16("dct") \n\t"
347 "movq "MANGLE(m127)
", %%mm0 \n\t"
384 "%xmm4" ,
"%xmm5" ,
"%xmm6" ,
"%xmm7" ,)
387 "%xmm12",
"%xmm13",
"%xmm14",)
389 "%eax",
"%ecx",
"%edx",
"%esi",
"memory"
DECLARE_ASM_CONST(16, int16_t, tan1)[]
#define TEST_ONE_ROW(src, reg, clear)
#define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2)
void ff_idct_xvid_sse2(short *block)
void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block)
void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)
#define iLLM_PASS(dct)
IDCT pass on columns.
#define XMM_CLOBBERS(...)
#define iLLM_PASS_SPARSE(dct)
IDCT pass on columns, assuming rows 4-7 are zero.
header for Xvid IDCT functions
#define iMTX_MULT(src, table, rounder, put)
IDCT pass on rows.
void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)