diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c index 151a32ee..651274fc 100644 --- a/src/dsp/upsampling.c +++ b/src/dsp/upsampling.c @@ -153,25 +153,28 @@ WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) { // YUV444 converter #define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \ -static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \ - uint8_t* dst, int len) { \ +extern void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \ + uint8_t* dst, int len); \ +void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \ + uint8_t* dst, int len) { \ int i; \ for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \ } -YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb, 3) -YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr, 3) -YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba, 4) -YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra, 4) -YUV444_FUNC(Yuv444ToArgb, VP8YuvToArgb, 4) -YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2) -YUV444_FUNC(Yuv444ToRgb565, VP8YuvToRgb565, 2) +YUV444_FUNC(WebPYuv444ToRgbC, VP8YuvToRgb, 3) +YUV444_FUNC(WebPYuv444ToBgrC, VP8YuvToBgr, 3) +YUV444_FUNC(WebPYuv444ToRgbaC, VP8YuvToRgba, 4) +YUV444_FUNC(WebPYuv444ToBgraC, VP8YuvToBgra, 4) +YUV444_FUNC(WebPYuv444ToArgbC, VP8YuvToArgb, 4) +YUV444_FUNC(WebPYuv444ToRgba4444C, VP8YuvToRgba4444, 2) +YUV444_FUNC(WebPYuv444ToRgb565C, VP8YuvToRgb565, 2) #undef YUV444_FUNC WebPYUV444Converter WebPYUV444Converters[MODE_LAST]; extern void WebPInitYUV444ConvertersMIPSdspR2(void); +extern void WebPInitYUV444ConvertersSSE2(void); static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 = (VP8CPUInfo)&upsampling_last_cpuinfo_used1; @@ -179,19 +182,24 @@ static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 = WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) { if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return; - WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb; - WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba; - WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr; - WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra; - WebPYUV444Converters[MODE_ARGB] = Yuv444ToArgb; - WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444; - WebPYUV444Converters[MODE_RGB_565] = Yuv444ToRgb565; - WebPYUV444Converters[MODE_rgbA] = Yuv444ToRgba; - WebPYUV444Converters[MODE_bgrA] = Yuv444ToBgra; - WebPYUV444Converters[MODE_Argb] = Yuv444ToArgb; - WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444; + WebPYUV444Converters[MODE_RGB] = WebPYuv444ToRgbC; + WebPYUV444Converters[MODE_RGBA] = WebPYuv444ToRgbaC; + WebPYUV444Converters[MODE_BGR] = WebPYuv444ToBgrC; + WebPYUV444Converters[MODE_BGRA] = WebPYuv444ToBgraC; + WebPYUV444Converters[MODE_ARGB] = WebPYuv444ToArgbC; + WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444C; + WebPYUV444Converters[MODE_RGB_565] = WebPYuv444ToRgb565C; + WebPYUV444Converters[MODE_rgbA] = WebPYuv444ToRgbaC; + WebPYUV444Converters[MODE_bgrA] = WebPYuv444ToBgraC; + WebPYUV444Converters[MODE_Argb] = WebPYuv444ToArgbC; + WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444C; if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + WebPInitYUV444ConvertersSSE2(); + } +#endif #if defined(WEBP_USE_MIPS_DSP_R2) if (VP8GetCPUInfo(kMIPSdspR2)) { WebPInitYUV444ConvertersMIPSdspR2(); diff --git a/src/dsp/upsampling_sse2.c b/src/dsp/upsampling_sse2.c index ce5c1725..b85808e2 100644 --- a/src/dsp/upsampling_sse2.c +++ b/src/dsp/upsampling_sse2.c @@ -201,6 +201,41 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) { #endif // FANCY_UPSAMPLING +//------------------------------------------------------------------------------ + +extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */]; +extern void WebPInitYUV444ConvertersSSE2(void); + +#define YUV444_FUNC(FUNC_NAME, CALL, XSTEP) \ +extern void WebP##FUNC_NAME##C(const uint8_t* y, const uint8_t* u, \ + const uint8_t* v, uint8_t* dst, int len); \ +static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \ + uint8_t* dst, int len) { \ + int i; \ + const int max_len = len & ~31; \ + for (i = 0; i < max_len; i += 32) CALL(y + i, u + i, v + i, dst + i * XSTEP);\ + if (i < len) { /* C-fallback */ \ + WebP##FUNC_NAME##C(y + i, u + i, v + i, dst + i * XSTEP, len - i); \ + } \ +} + +YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba32, 4); +YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra32, 4); +YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb32, 3); +YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr32, 3); + +WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE2(void) { + VP8YUVInitSSE2(); + WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba; + WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra; + WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb; + WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr; +} + +#else + +WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersSSE2) + #endif // WEBP_USE_SSE2 #if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE2)) diff --git a/src/dsp/yuv.h b/src/dsp/yuv.h index 8a47edd8..af435a5b 100644 --- a/src/dsp/yuv.h +++ b/src/dsp/yuv.h @@ -249,7 +249,6 @@ void VP8YUVInit(void); // to the binary size. Otherwise, they are initialized at run-time (small cost). #define WEBP_YUV_USE_SSE2_TABLES -#if defined(FANCY_UPSAMPLING) // Process 32 pixels and store the result (24b or 32b per pixel) in *dst. void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst); @@ -259,7 +258,6 @@ void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst); void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst); -#endif // FANCY_UPSAMPLING // Must be called to initialize tables before using the functions. void VP8YUVInitSSE2(void); diff --git a/src/dsp/yuv_sse2.c b/src/dsp/yuv_sse2.c index 47d99e73..2f7a1835 100644 --- a/src/dsp/yuv_sse2.c +++ b/src/dsp/yuv_sse2.c @@ -125,8 +125,6 @@ static WEBP_INLINE void YuvToBgrSSE2(uint8_t y, uint8_t u, uint8_t v, //----------------------------------------------------------------------------- // Convert spans of 32 pixels to various RGB formats for the fancy upsampler. -#ifdef FANCY_UPSAMPLING - void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst) { int n; @@ -186,8 +184,6 @@ void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v, memcpy(dst + n * 3, tmp, 2 * 3); } -#endif // FANCY_UPSAMPLING - //----------------------------------------------------------------------------- // Arbitrary-length row conversion functions