41 0x0200020002000200LL,};
45 0x0004000400040004LL,};
59 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL;
60 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL;
61 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL;
63 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
64 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
65 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
67 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
68 DECLARE_ALIGNED(8,
const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
78 {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
79 {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
87 #define COMPILE_TEMPLATE_MMX2 0
88 #define RENAME(a) a ## _MMX
95 #undef COMPILE_TEMPLATE_MMX2
96 #define COMPILE_TEMPLATE_MMX2 1
97 #define RENAME(a) a ## _MMX2
121 const int firstLumSrcY= vLumFilterPos[
dstY];
122 const int firstChrSrcY= vChrFilterPos[chrDstY];
130 if (dstY < dstH - 2) {
131 const int16_t **lumSrcPtr= (
const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
132 const int16_t **chrUSrcPtr= (
const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
136 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->
srcH) {
137 const int16_t **tmpY = (
const int16_t **) lumPixBuf + 2 * vLumBufSize;
138 int neg = -firstLumSrcY, i, end =
FFMIN(c->
srcH - firstLumSrcY, vLumFilterSize);
139 for (i = 0; i < neg; i++)
140 tmpY[i] = lumSrcPtr[neg];
141 for ( ; i < end; i++)
142 tmpY[i] = lumSrcPtr[i];
148 const int16_t **tmpA = (
const int16_t **) alpPixBuf + 2 * vLumBufSize;
149 for (i = 0; i < neg; i++)
150 tmpA[i] = alpSrcPtr[neg];
151 for ( ; i < end; i++)
152 tmpA[i] = alpSrcPtr[i];
154 tmpA[i] = tmpA[i - 1];
158 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->
chrSrcH) {
159 const int16_t **tmpU = (
const int16_t **) chrUPixBuf + 2 * vChrBufSize;
160 int neg = -firstChrSrcY, i, end =
FFMIN(c->
chrSrcH - firstChrSrcY, vChrFilterSize);
161 for (i = 0; i < neg; i++) {
162 tmpU[i] = chrUSrcPtr[neg];
164 for ( ; i < end; i++) {
165 tmpU[i] = chrUSrcPtr[i];
168 tmpU[i] = tmpU[i - 1];
176 *(
const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
177 *(
const void**)&lumMmxFilter[s*i+
APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
179 lumMmxFilter[s*i+
APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
180 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
182 *(
const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
183 *(
const void**)&alpMmxFilter[s*i+
APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
189 *(
const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
190 *(
const void**)&chrMmxFilter[s*i+
APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
192 chrMmxFilter[s*i+
APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
193 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
197 *(
const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
200 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
202 *(
const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
204 alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
208 *(
const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
211 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
217 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
218 extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
219 SwsContext *c, int16_t *data, \
220 int dstW, const uint8_t *src, \
221 const int16_t *filter, \
222 const int32_t *filterPos, int filterSize)
224 #define SCALE_FUNCS(filter_n, opt) \
225 SCALE_FUNC(filter_n, 8, 15, opt); \
226 SCALE_FUNC(filter_n, 9, 15, opt); \
227 SCALE_FUNC(filter_n, 10, 15, opt); \
228 SCALE_FUNC(filter_n, 16, 15, opt); \
229 SCALE_FUNC(filter_n, 8, 19, opt); \
230 SCALE_FUNC(filter_n, 9, 19, opt); \
231 SCALE_FUNC(filter_n, 10, 19, opt); \
232 SCALE_FUNC(filter_n, 16, 19, opt)
234 #define SCALE_FUNCS_MMX(opt) \
235 SCALE_FUNCS(4, opt); \
236 SCALE_FUNCS(8, opt); \
239 #define SCALE_FUNCS_SSE(opt) \
240 SCALE_FUNCS(4, opt); \
241 SCALE_FUNCS(8, opt); \
242 SCALE_FUNCS(X4, opt); \
252 #define VSCALEX_FUNC(size, opt) \
253 extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
254 const int16_t **src, uint8_t *dest, int dstW, \
255 const uint8_t *dither, int offset)
256 #define VSCALEX_FUNCS(opt) \
257 VSCALEX_FUNC(8, opt); \
258 VSCALEX_FUNC(9, opt); \
259 VSCALEX_FUNC(10, opt)
269 #define VSCALE_FUNC(size, opt) \
270 extern void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
271 const uint8_t *dither, int offset)
272 #define VSCALE_FUNCS(opt1, opt2) \
273 VSCALE_FUNC(8, opt1); \
274 VSCALE_FUNC(9, opt2); \
275 VSCALE_FUNC(10, opt2); \
276 VSCALE_FUNC(16, opt1)
285 #define INPUT_UV_FUNC(fmt, opt) \
286 extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
287 const uint8_t *src, const uint8_t *unused1, \
288 int w, uint32_t *unused2)
289 #define INPUT_FUNC(fmt, opt) \
290 extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
291 int w, uint32_t *unused); \
292 INPUT_UV_FUNC(fmt, opt)
293 #define INPUT_FUNCS(opt) \
294 INPUT_FUNC(uyvy, opt); \
295 INPUT_FUNC(yuyv, opt); \
296 INPUT_UV_FUNC(nv12, opt); \
297 INPUT_UV_FUNC(nv21, opt)
310 sws_init_swScale_MMX(c);
313 sws_init_swScale_MMX2(c);
317 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
318 if (c->srcBpc == 8) { \
319 hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
320 ff_hscale8to19_ ## filtersize ## _ ## opt1; \
321 } else if (c->srcBpc == 9) { \
322 hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
323 ff_hscale9to19_ ## filtersize ## _ ## opt1; \
324 } else if (c->srcBpc == 10) { \
325 hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
326 ff_hscale10to19_ ## filtersize ## _ ## opt1; \
328 hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
329 ff_hscale16to19_ ## filtersize ## _ ## opt1; \
332 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
333 switch (filtersize) { \
334 case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
335 case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
336 default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
338 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case) \
340 case 16: do_16_case; break; \
341 case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
342 case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
343 default: vscalefn = ff_yuv2planeX_8_ ## opt; break; \
345 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
347 case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
348 case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
349 case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
350 default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
353 if (cpu_flags & AV_CPU_FLAG_MMX) {
356 ASSIGN_VSCALE_FUNC(c->
yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2);
382 if (cpu_flags & AV_CPU_FLAG_MMX2) {
386 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
387 switch (filtersize) { \
388 case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
389 case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
390 default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
391 else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
398 ASSIGN_VSCALE_FUNC(c->
yuv2plane1, sse2, sse2, 1);
438 ASSIGN_VSCALE_FUNC(c->
yuv2plane1, avx, avx, 1);
#define AV_CPU_FLAG_AVX
AVX functions: requires OS support even if YMM registers aren't used.
int16_t ** alpPixBuf
Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
int chrBufIndex
Index in ring buffer of the last scaled horizontal chroma line from source.
const uint64_t ff_dither8[2]
#define SCALE_FUNCS_MMX(opt)
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
int chrSrcH
Height of source chroma planes.
#define VSCALEX_FUNC(size, opt)
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, int lastInLumBuf, int lastInChrBuf)
#define AV_CPU_FLAG_MMX2
SSE integer functions or AMD MMX ext.
#define DECLARE_ALIGNED(n, t, v)
int dstY
Last destination vertical line output from last slice.
enum PixelFormat srcFormat
Source pixel format.
#define SCALE_FUNCS_SSE(opt)
int srcH
Height of source luma/alpha planes.
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
int vChrFilterSize
Vertical filter size for chroma pixels.
int16_t ** lumPixBuf
Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
int lastInLumBuf
Last scaled horizontal luma/alpha line from source in the ring buffer.
external api for the swscale stuff
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
int dstH
Height of destination luma/alpha planes.
#define AV_CPU_FLAG_SSSE3
Conroe SSSE3 functions.
const uint64_t ff_dither4[2]
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
int vChrBufSize
Number of vertical chroma lines allocated in the ring buffer.
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
int hChrFilterSize
Horizontal filter size for chroma pixels.
void ff_sws_init_swScale_mmx(SwsContext *c)
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
yuv2planar1_fn yuv2plane1
int vLumBufSize
Number of vertical luma/alpha lines allocated in the ring buffer.
int16_t ** chrUPixBuf
Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
as above, but U and V bytes are swapped
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
yuv2planarX_fn yuv2planeX
#define AV_CPU_FLAG_MMX
standard MMX
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
#define AV_CPU_FLAG_SSE4
Penryn SSE4.1 functions.
#define VSCALEX_FUNCS(opt)
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
int lumBufIndex
Index in ring buffer of the last scaled horizontal luma/alpha line from source.
int lastInChrBuf
Last scaled horizontal chroma line from source in the ring buffer.
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
#define VSCALE_FUNCS(opt1, opt2)
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
#define VSCALE_FUNC(size, opt)
enum PixelFormat dstFormat
Destination pixel format.
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
DECLARE_ASM_CONST(DECLARE_ASM_CONST(8, DECLARE_ASM_CONST(uint64_t, bF8)=0