swscale_mmx.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/intreadwrite.h"
26 #include "libavutil/x86_cpu.h"
27 #include "libavutil/cpu.h"
28 #include "libavutil/pixdesc.h"
29 
30 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
31 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
32 DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
33 DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
34 DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
35 DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
36 DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
37 DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
38 
39 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
40  0x0103010301030103LL,
41  0x0200020002000200LL,};
42 
43 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
44  0x0602060206020602LL,
45  0x0004000400040004LL,};
46 
47 DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
48 DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
49 DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
50 DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
51 DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
52 DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
53 
54 DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
55 DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
56 DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
57 
58 #ifdef FAST_BGR2YV12
59 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL;
60 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL;
61 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL;
62 #else
63 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
64 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
65 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
66 #endif /* FAST_BGR2YV12 */
67 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
68 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
69 DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
70 
71 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
72 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
73 DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
74 DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
75 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
76 
77 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
78  {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
79  {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
80 };
81 
82 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
83 
84 //MMX versions
85 #if HAVE_MMX
86 #undef RENAME
87 #define COMPILE_TEMPLATE_MMX2 0
88 #define RENAME(a) a ## _MMX
89 #include "swscale_template.c"
90 #endif
91 
92 //MMX2 versions
93 #if HAVE_MMX2
94 #undef RENAME
95 #undef COMPILE_TEMPLATE_MMX2
96 #define COMPILE_TEMPLATE_MMX2 1
97 #define RENAME(a) a ## _MMX2
98 #include "swscale_template.c"
99 #endif
100 
102  int lastInLumBuf, int lastInChrBuf)
103 {
104  const int dstH= c->dstH;
105  const int flags= c->flags;
106  int16_t **lumPixBuf= c->lumPixBuf;
107  int16_t **chrUPixBuf= c->chrUPixBuf;
108  int16_t **alpPixBuf= c->alpPixBuf;
109  const int vLumBufSize= c->vLumBufSize;
110  const int vChrBufSize= c->vChrBufSize;
111  int32_t *vLumFilterPos= c->vLumFilterPos;
112  int32_t *vChrFilterPos= c->vChrFilterPos;
113  int16_t *vLumFilter= c->vLumFilter;
114  int16_t *vChrFilter= c->vChrFilter;
115  int32_t *lumMmxFilter= c->lumMmxFilter;
116  int32_t *chrMmxFilter= c->chrMmxFilter;
117  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
118  const int vLumFilterSize= c->vLumFilterSize;
119  const int vChrFilterSize= c->vChrFilterSize;
120  const int chrDstY= dstY>>c->chrDstVSubSample;
121  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
122  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
123 
124  c->blueDither= ff_dither8[dstY&1];
126  c->greenDither= ff_dither8[dstY&1];
127  else
128  c->greenDither= ff_dither4[dstY&1];
129  c->redDither= ff_dither8[(dstY+1)&1];
130  if (dstY < dstH - 2) {
131  const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
132  const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
133  const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
134  int i;
135 
136  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
137  const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
138  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
139  for (i = 0; i < neg; i++)
140  tmpY[i] = lumSrcPtr[neg];
141  for ( ; i < end; i++)
142  tmpY[i] = lumSrcPtr[i];
143  for ( ; i < vLumFilterSize; i++)
144  tmpY[i] = tmpY[i-1];
145  lumSrcPtr = tmpY;
146 
147  if (alpSrcPtr) {
148  const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
149  for (i = 0; i < neg; i++)
150  tmpA[i] = alpSrcPtr[neg];
151  for ( ; i < end; i++)
152  tmpA[i] = alpSrcPtr[i];
153  for ( ; i < vLumFilterSize; i++)
154  tmpA[i] = tmpA[i - 1];
155  alpSrcPtr = tmpA;
156  }
157  }
158  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
159  const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize;
160  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
161  for (i = 0; i < neg; i++) {
162  tmpU[i] = chrUSrcPtr[neg];
163  }
164  for ( ; i < end; i++) {
165  tmpU[i] = chrUSrcPtr[i];
166  }
167  for ( ; i < vChrFilterSize; i++) {
168  tmpU[i] = tmpU[i - 1];
169  }
170  chrUSrcPtr = tmpU;
171  }
172 
173  if (flags & SWS_ACCURATE_RND) {
174  int s= APCK_SIZE / 8;
175  for (i=0; i<vLumFilterSize; i+=2) {
176  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
177  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
178  lumMmxFilter[s*i+APCK_COEF/4 ]=
179  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
180  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
181  if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
182  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
183  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
184  alpMmxFilter[s*i+APCK_COEF/4 ]=
185  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
186  }
187  }
188  for (i=0; i<vChrFilterSize; i+=2) {
189  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
190  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
191  chrMmxFilter[s*i+APCK_COEF/4 ]=
192  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
193  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
194  }
195  } else {
196  for (i=0; i<vLumFilterSize; i++) {
197  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
198  lumMmxFilter[4*i+2]=
199  lumMmxFilter[4*i+3]=
200  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
201  if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
202  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
203  alpMmxFilter[4*i+2]=
204  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
205  }
206  }
207  for (i=0; i<vChrFilterSize; i++) {
208  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
209  chrMmxFilter[4*i+2]=
210  chrMmxFilter[4*i+3]=
211  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
212  }
213  }
214  }
215 }
216 
217 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
218 extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
219  SwsContext *c, int16_t *data, \
220  int dstW, const uint8_t *src, \
221  const int16_t *filter, \
222  const int32_t *filterPos, int filterSize)
223 
224 #define SCALE_FUNCS(filter_n, opt) \
225  SCALE_FUNC(filter_n, 8, 15, opt); \
226  SCALE_FUNC(filter_n, 9, 15, opt); \
227  SCALE_FUNC(filter_n, 10, 15, opt); \
228  SCALE_FUNC(filter_n, 16, 15, opt); \
229  SCALE_FUNC(filter_n, 8, 19, opt); \
230  SCALE_FUNC(filter_n, 9, 19, opt); \
231  SCALE_FUNC(filter_n, 10, 19, opt); \
232  SCALE_FUNC(filter_n, 16, 19, opt)
233 
234 #define SCALE_FUNCS_MMX(opt) \
235  SCALE_FUNCS(4, opt); \
236  SCALE_FUNCS(8, opt); \
237  SCALE_FUNCS(X, opt)
238 
239 #define SCALE_FUNCS_SSE(opt) \
240  SCALE_FUNCS(4, opt); \
241  SCALE_FUNCS(8, opt); \
242  SCALE_FUNCS(X4, opt); \
243  SCALE_FUNCS(X8, opt)
244 
245 #if ARCH_X86_32
246 SCALE_FUNCS_MMX(mmx);
247 #endif
248 SCALE_FUNCS_SSE(sse2);
249 SCALE_FUNCS_SSE(ssse3);
250 SCALE_FUNCS_SSE(sse4);
251 
252 #define VSCALEX_FUNC(size, opt) \
253 extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
254  const int16_t **src, uint8_t *dest, int dstW, \
255  const uint8_t *dither, int offset)
256 #define VSCALEX_FUNCS(opt) \
257  VSCALEX_FUNC(8, opt); \
258  VSCALEX_FUNC(9, opt); \
259  VSCALEX_FUNC(10, opt)
260 
261 #if ARCH_X86_32
262 VSCALEX_FUNCS(mmx2);
263 #endif
264 VSCALEX_FUNCS(sse2);
265 VSCALEX_FUNCS(sse4);
266 VSCALEX_FUNC(16, sse4);
267 VSCALEX_FUNCS(avx);
268 
269 #define VSCALE_FUNC(size, opt) \
270 extern void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
271  const uint8_t *dither, int offset)
272 #define VSCALE_FUNCS(opt1, opt2) \
273  VSCALE_FUNC(8, opt1); \
274  VSCALE_FUNC(9, opt2); \
275  VSCALE_FUNC(10, opt2); \
276  VSCALE_FUNC(16, opt1)
277 
278 #if ARCH_X86_32
279 VSCALE_FUNCS(mmx, mmx2);
280 #endif
281 VSCALE_FUNCS(sse2, sse2);
282 VSCALE_FUNC(16, sse4);
283 VSCALE_FUNCS(avx, avx);
284 
285 #define INPUT_UV_FUNC(fmt, opt) \
286 extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
287  const uint8_t *src, const uint8_t *unused1, \
288  int w, uint32_t *unused2)
289 #define INPUT_FUNC(fmt, opt) \
290 extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
291  int w, uint32_t *unused); \
292  INPUT_UV_FUNC(fmt, opt)
293 #define INPUT_FUNCS(opt) \
294  INPUT_FUNC(uyvy, opt); \
295  INPUT_FUNC(yuyv, opt); \
296  INPUT_UV_FUNC(nv12, opt); \
297  INPUT_UV_FUNC(nv21, opt)
298 
299 #if ARCH_X86_32
300 INPUT_FUNCS(mmx);
301 #endif
302 INPUT_FUNCS(sse2);
303 INPUT_FUNCS(avx);
304 
306 {
307  int cpu_flags = av_get_cpu_flags();
308 
309  if (cpu_flags & AV_CPU_FLAG_MMX)
310  sws_init_swScale_MMX(c);
311 #if HAVE_MMX2
312  if (cpu_flags & AV_CPU_FLAG_MMX2)
313  sws_init_swScale_MMX2(c);
314 #endif
315 
316 #if HAVE_YASM
317 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
318  if (c->srcBpc == 8) { \
319  hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
320  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
321  } else if (c->srcBpc == 9) { \
322  hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
323  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
324  } else if (c->srcBpc == 10) { \
325  hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
326  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
327  } else /* c->srcBpc == 16 */ { \
328  hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
329  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
330  } \
331 } while (0)
332 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
333  switch (filtersize) { \
334  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
335  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
336  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
337  }
338 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case) \
339 switch(c->dstBpc){ \
340  case 16: do_16_case; break; \
341  case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
342  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
343  default: vscalefn = ff_yuv2planeX_8_ ## opt; break; \
344  }
345 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
346  switch(c->dstBpc){ \
347  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
348  case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
349  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
350  default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
351  }
352 #if ARCH_X86_32
353  if (cpu_flags & AV_CPU_FLAG_MMX) {
354  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
355  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
356  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2);
357 
358  switch (c->srcFormat) {
359  case PIX_FMT_Y400A:
360  c->lumToYV12 = ff_yuyvToY_mmx;
361  if (c->alpPixBuf)
362  c->alpToYV12 = ff_uyvyToY_mmx;
363  break;
364  case PIX_FMT_YUYV422:
365  c->lumToYV12 = ff_yuyvToY_mmx;
366  c->chrToYV12 = ff_yuyvToUV_mmx;
367  break;
368  case PIX_FMT_UYVY422:
369  c->lumToYV12 = ff_uyvyToY_mmx;
370  c->chrToYV12 = ff_uyvyToUV_mmx;
371  break;
372  case PIX_FMT_NV12:
373  c->chrToYV12 = ff_nv12ToUV_mmx;
374  break;
375  case PIX_FMT_NV21:
376  c->chrToYV12 = ff_nv21ToUV_mmx;
377  break;
378  default:
379  break;
380  }
381  }
382  if (cpu_flags & AV_CPU_FLAG_MMX2) {
383  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2,);
384  }
385 #endif
386 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
387  switch (filtersize) { \
388  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
389  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
390  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
391  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
392  break; \
393  }
394  if (cpu_flags & AV_CPU_FLAG_SSE2) {
395  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
396  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
397  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2,);
398  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
399 
400  switch (c->srcFormat) {
401  case PIX_FMT_Y400A:
402  c->lumToYV12 = ff_yuyvToY_sse2;
403  if (c->alpPixBuf)
404  c->alpToYV12 = ff_uyvyToY_sse2;
405  break;
406  case PIX_FMT_YUYV422:
407  c->lumToYV12 = ff_yuyvToY_sse2;
408  c->chrToYV12 = ff_yuyvToUV_sse2;
409  break;
410  case PIX_FMT_UYVY422:
411  c->lumToYV12 = ff_uyvyToY_sse2;
412  c->chrToYV12 = ff_uyvyToUV_sse2;
413  break;
414  case PIX_FMT_NV12:
415  c->chrToYV12 = ff_nv12ToUV_sse2;
416  break;
417  case PIX_FMT_NV21:
418  c->chrToYV12 = ff_nv21ToUV_sse2;
419  break;
420  }
421  }
422  if (cpu_flags & AV_CPU_FLAG_SSSE3) {
423  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
424  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
425  }
426  if (cpu_flags & AV_CPU_FLAG_SSE4) {
427  /* Xto15 don't need special sse4 functions */
428  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
429  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
430  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
431  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4);
432  if (c->dstBpc == 16 && !isBE(c->dstFormat))
433  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
434  }
435 
436  if (cpu_flags & AV_CPU_FLAG_AVX) {
437  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,);
438  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
439 
440  switch (c->srcFormat) {
441  case PIX_FMT_YUYV422:
442  c->chrToYV12 = ff_yuyvToUV_avx;
443  break;
444  case PIX_FMT_UYVY422:
445  c->chrToYV12 = ff_uyvyToUV_avx;
446  break;
447  case PIX_FMT_NV12:
448  c->chrToYV12 = ff_nv12ToUV_avx;
449  break;
450  case PIX_FMT_NV21:
451  c->chrToYV12 = ff_nv21ToUV_avx;
452  break;
453  default:
454  break;
455  }
456  }
457 #endif
458 }
#define isBE(x)
#define AV_CPU_FLAG_AVX
AVX functions: requires OS support even if YMM registers aren't used.
Definition: cpu.h:40
int16_t ** alpPixBuf
Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
int chrBufIndex
Index in ring buffer of the last scaled horizontal chroma line from source.
#define PIX_FMT_BGR555
Definition: pixfmt.h:181
const uint64_t ff_dither8[2]
Definition: swscale_mmx.c:43
#define SCALE_FUNCS_MMX(opt)
Definition: swscale_mmx.c:234
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
int chrSrcH
Height of source chroma planes.
#define VSCALEX_FUNC(size, opt)
Definition: swscale_mmx.c:252
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, int lastInLumBuf, int lastInChrBuf)
#define AV_CPU_FLAG_MMX2
SSE integer functions or AMD MMX ext.
Definition: cpu.h:28
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:55
uint64_t redDither
int dstY
Last destination vertical line output from last slice.
uint64_t blueDither
enum PixelFormat srcFormat
Source pixel format.
#define SCALE_FUNCS_SSE(opt)
Definition: swscale_mmx.c:239
int srcH
Height of source luma/alpha planes.
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
int vChrFilterSize
Vertical filter size for chroma pixels.
int16_t ** lumPixBuf
Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
#define PIX_FMT_RGB555
Definition: pixfmt.h:177
int lastInLumBuf
Last scaled horizontal luma/alpha line from source in the ring buffer.
static int flags
Definition: log.c:34
external api for the swscale stuff
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
#define INPUT_FUNCS(opt)
Definition: swscale_mmx.c:293
#define AV_CPU_FLAG_SSSE3
Conroe SSSE3 functions.
Definition: cpu.h:36
const uint64_t ff_dither4[2]
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
int vChrBufSize
Number of vertical chroma lines allocated in the ring buffer.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
#define APCK_SIZE
void ff_sws_init_swScale_mmx(SwsContext *c)
Definition: swscale_mmx.c:305
#define FFMIN(a, b)
Definition: common.h:55
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
yuv2planar1_fn yuv2plane1
int vLumBufSize
Number of vertical luma/alpha lines allocated in the ring buffer.
int16_t ** chrUPixBuf
Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
8bit gray, 8bit alpha
Definition: pixfmt.h:137
static int cpu_flags
Definition: dct-test.c:85
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
NULL
Definition: eval.c:50
yuv2planarX_fn yuv2planeX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:27
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:65
#define APCK_COEF
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
#define AV_CPU_FLAG_SSE4
Penryn SSE4.1 functions.
Definition: cpu.h:38
#define VSCALEX_FUNCS(opt)
Definition: swscale_mmx.c:256
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:22
#define SWS_ACCURATE_RND
Definition: swscale.h:100
int lumBufIndex
Index in ring buffer of the last scaled horizontal luma/alpha line from source.
int lastInChrBuf
Last scaled horizontal chroma line from source in the ring buffer.
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale_mmx.c:272
uint64_t greenDither
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
Definition: cpu.h:31
#define APCK_PTR2
#define VSCALE_FUNC(size, opt)
Definition: swscale_mmx.c:269
enum PixelFormat dstFormat
Destination pixel format.
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
#define av_unused
Definition: attributes.h:95
#define CONFIG_SWSCALE_ALPHA
Definition: config.h:243
DECLARE_ASM_CONST(DECLARE_ASM_CONST(8, DECLARE_ASM_CONST(uint64_t, bF8)=0
Definition: swscale_mmx.c:30