diff --git a/CMakeLists.txt b/CMakeLists.txt index 93c9c56d..6c3376c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,7 +35,7 @@ string(REGEX MATCH "[0-9.]+" WEBP_VERSION ${SOURCE_FILE}) ################################################################################ # Options. if(WEBP_ENABLE_SWAP_16BIT_CSP) - add_definitions(-DWEBP_SWAP_16BIT_CSP) + add_definitions(-DWEBP_SWAP_16BIT_CSP=1) endif() ################################################################################ diff --git a/configure.ac b/configure.ac index bb3e3114..9902796b 100644 --- a/configure.ac +++ b/configure.ac @@ -659,7 +659,7 @@ if test "$enable_wic" = "yes"; then fi esac -dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP +dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP=1 USE_SWAP_16BIT_CSP="" AC_MSG_CHECKING(if --enable-swap-16bit-csp option is specified) @@ -667,7 +667,7 @@ AC_ARG_ENABLE([swap-16bit-csp], AS_HELP_STRING([--enable-swap-16bit-csp], [Enable byte swap for 16 bit colorspaces])) if test "$enable_swap_16bit_csp" = "yes"; then - USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP" + USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP=1" fi AC_MSG_RESULT(${enable_swap_16bit_csp-no}) AC_SUBST(USE_SWAP_16BIT_CSP) diff --git a/makefile.unix b/makefile.unix index fad33e90..223584c6 100644 --- a/makefile.unix +++ b/makefile.unix @@ -57,7 +57,7 @@ endif # EXTRA_FLAGS += -DWEBP_EXPERIMENTAL_FEATURES # Extra flags to enable byte swap for 16 bit colorspaces. -# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP +# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP=1 # Extra flags to enable multi-threading EXTRA_FLAGS += -DWEBP_USE_THREAD diff --git a/src/dec/io_dec.c b/src/dec/io_dec.c index 8bfab869..83a9701d 100644 --- a/src/dec/io_dec.c +++ b/src/dec/io_dec.c @@ -212,7 +212,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p, int num_rows; const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) uint8_t* alpha_dst = base_rgba; #else uint8_t* alpha_dst = base_rgba + 1; @@ -431,7 +431,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos, int max_lines_out) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride; -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) uint8_t* alpha_dst = base_rgba; #else uint8_t* alpha_dst = base_rgba + 1; @@ -598,9 +598,6 @@ static int CustomSetup(VP8Io* io) { } } - if (is_rgb) { - VP8YUVInit(); - } return 1; } diff --git a/src/dec/tree_dec.c b/src/dec/tree_dec.c index 9e805f60..147b5cf9 100644 --- a/src/dec/tree_dec.c +++ b/src/dec/tree_dec.c @@ -14,12 +14,16 @@ #include "./vp8i_dec.h" #include "../utils/bit_reader_inl_utils.h" +#if !defined(USE_GENERIC_TREE) #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then. -#define USE_GENERIC_TREE +#define USE_GENERIC_TREE 1 // ALTERNATE_CODE +#else +#define USE_GENERIC_TREE 0 #endif +#endif // USE_GENERIC_TREE -#ifdef USE_GENERIC_TREE +#if (USE_GENERIC_TREE == 1) static const int8_t kYModesIntra4[18] = { -B_DC_PRED, 1, -B_TM_PRED, 2, @@ -317,7 +321,7 @@ static void ParseIntraMode(VP8BitReader* const br, int x; for (x = 0; x < 4; ++x) { const uint8_t* const prob = kBModesProba[top[x]][ymode]; -#ifdef USE_GENERIC_TREE +#if (USE_GENERIC_TREE == 1) // Generic tree-parsing int i = kYModesIntra4[VP8GetBit(br, prob[0])]; while (i > 0) { @@ -335,7 +339,7 @@ static void ParseIntraMode(VP8BitReader* const br, (!VP8GetBit(br, prob[6]) ? B_LD_PRED : (!VP8GetBit(br, prob[7]) ? B_VL_PRED : (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED))); -#endif // USE_GENERIC_TREE +#endif // USE_GENERIC_TREE top[x] = ymode; } memcpy(modes, top, 4 * sizeof(*top)); diff --git a/src/dsp/alpha_processing.c b/src/dsp/alpha_processing.c index 0f31a66e..a85bde17 100644 --- a/src/dsp/alpha_processing.c +++ b/src/dsp/alpha_processing.c @@ -15,7 +15,10 @@ #include "./dsp.h" // Tables can be faster on some platform but incur some extra binary size (~2k). -// #define USE_TABLES_FOR_ALPHA_MULT +#if !defined(USE_TABLES_FOR_ALPHA_MULT) +#define USE_TABLES_FOR_ALPHA_MULT 0 // ALTERNATE_CODE +#endif + // ----------------------------------------------------------------------------- @@ -29,7 +32,7 @@ static uint32_t Mult(uint8_t x, uint32_t mult) { return v; } -#ifdef USE_TABLES_FOR_ALPHA_MULT +#if (USE_TABLES_FOR_ALPHA_MULT == 1) static const uint32_t kMultTables[2][256] = { { // (255u << MFIX) / alpha @@ -132,7 +135,7 @@ static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) { return inverse ? (255u << MFIX) / a : a * KINV_255; } -#endif // USE_TABLES_FOR_ALPHA_MULT +#endif // USE_TABLES_FOR_ALPHA_MULT void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) { int x; @@ -277,7 +280,7 @@ static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444, static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444, int w, int h, int stride) { -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1); #else ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0); diff --git a/src/dsp/dec_clip_tables.c b/src/dsp/dec_clip_tables.c index 74ba34c0..beb051a3 100644 --- a/src/dsp/dec_clip_tables.c +++ b/src/dsp/dec_clip_tables.c @@ -13,9 +13,12 @@ #include "./dsp.h" -#define USE_STATIC_TABLES // undefine to have run-time table initialization +// define to 0 to have run-time table initialization +#if !defined(USE_STATIC_TABLES) +#define USE_STATIC_TABLES 1 // ALTERNATE_CODE +#endif -#ifdef USE_STATIC_TABLES +#if (USE_STATIC_TABLES == 1) static const uint8_t abs0[255 + 255 + 1] = { 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, @@ -337,7 +340,7 @@ static uint8_t clip1[255 + 511 + 1]; // and make sure it's set to true _last_ (so as to be thread-safe) static volatile int tables_ok = 0; -#endif +#endif // USE_STATIC_TABLES const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020]; const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112]; @@ -345,7 +348,7 @@ const uint8_t* const VP8kclip1 = &clip1[255]; const uint8_t* const VP8kabs0 = &abs0[255]; WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) { -#if !defined(USE_STATIC_TABLES) +#if (USE_STATIC_TABLES == 0) int i; if (!tables_ok) { for (i = -255; i <= 255; ++i) { diff --git a/src/dsp/dec_sse2.c b/src/dsp/dec_sse2.c index 8797688a..0704fd04 100644 --- a/src/dsp/dec_sse2.c +++ b/src/dsp/dec_sse2.c @@ -18,7 +18,9 @@ // The 3-coeff sparse transform in SSE2 is not really faster than the plain-C // one it seems => disable it by default. Uncomment the following to enable: -// #define USE_TRANSFORM_AC3 +#if !defined(USE_TRANSFORM_AC3) +#define USE_TRANSFORM_AC3 0 // ALTERNATE_CODE +#endif #include #include "./common_sse2.h" @@ -193,7 +195,7 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) { } } -#if defined(USE_TRANSFORM_AC3) +#if (USE_TRANSFORM_AC3 == 1) #define MUL(a, b) (((a) * (b)) >> 16) static void TransformAC3(const int16_t* in, uint8_t* dst) { static const int kC1 = 20091 + (1 << 16); @@ -1182,8 +1184,8 @@ extern void VP8DspInitSSE2(void); WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) { VP8Transform = Transform; -#if defined(USE_TRANSFORM_AC3) - VP8TransformAC3 = TransformAC3; +#if (USE_TRANSFORM_AC3 == 1) + VP8TransformAC3 = TransformAC3_SSE2; #endif VP8VFilter16 = VFilter16; diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 4cec8a18..73ed55b5 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -134,6 +134,11 @@ extern "C" { #endif #endif +// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) +#if !defined(WEBP_SWAP_16BIT_CSP) +#define WEBP_SWAP_16BIT_CSP 0 +#endif + typedef enum { kSSE2, kSSE3, diff --git a/src/dsp/filters_neon.c b/src/dsp/filters_neon.c index 4d6e50cc..6b9c0668 100644 --- a/src/dsp/filters_neon.c +++ b/src/dsp/filters_neon.c @@ -251,9 +251,11 @@ static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in, // GradientUnfilter_NEON is correct but slower than the C-version, // at least on ARM64. For armv7, it's a wash. // So best is to disable it for now, but keep the idea around... -// #define USE_GRADIENT_UNFILTER +#if !defined(USE_GRADIENT_UNFILTER) +#define USE_GRADIENT_UNFILTER 0 // ALTERNATE_CODE +#endif -#if defined(USE_GRADIENT_UNFILTER) +#if (USE_GRADIENT_UNFILTER == 1) #define GRAD_PROCESS_LANE(L) do { \ const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1); /* rotate predictor in */ \ const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1)); \ @@ -311,7 +313,7 @@ extern void VP8FiltersInitNEON(void); WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitNEON(void) { WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_NEON; WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_NEON; -#if defined(USE_GRADIENT_UNFILTER) +#if (USE_GRADIENT_UNFILTER == 1) WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_NEON; #endif diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index 20d18f6e..de461e65 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -452,7 +452,7 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, const uint32_t argb = *src++; const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf); const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf); -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) *dst++ = ba; *dst++ = rg; #else @@ -469,7 +469,7 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src, const uint32_t argb = *src++; const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7); const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f); -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) *dst++ = gb; *dst++ = rg; #else @@ -496,22 +496,7 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, const uint32_t* const src_end = src + num_pixels; while (src < src_end) { const uint32_t argb = *src++; - -#if !defined(WORDS_BIGENDIAN) -#if !defined(WEBP_REFERENCE_IMPLEMENTATION) WebPUint32ToMem(dst, BSwap32(argb)); -#else // WEBP_REFERENCE_IMPLEMENTATION - dst[0] = (argb >> 24) & 0xff; - dst[1] = (argb >> 16) & 0xff; - dst[2] = (argb >> 8) & 0xff; - dst[3] = (argb >> 0) & 0xff; -#endif -#else // WORDS_BIGENDIAN - dst[0] = (argb >> 0) & 0xff; - dst[1] = (argb >> 8) & 0xff; - dst[2] = (argb >> 16) & 0xff; - dst[3] = (argb >> 24) & 0xff; -#endif dst += sizeof(argb); } } else { diff --git a/src/dsp/lossless_mips_dsp_r2.c b/src/dsp/lossless_mips_dsp_r2.c index 2984ce8d..6d7f6e87 100644 --- a/src/dsp/lossless_mips_dsp_r2.c +++ b/src/dsp/lossless_mips_dsp_r2.c @@ -492,7 +492,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src, "ins %[temp3], %[temp5], 16, 4 \n\t" "addiu %[src], %[src], 16 \n\t" "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t" -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) "usw %[temp1], 0(%[dst]) \n\t" "usw %[temp3], 4(%[dst]) \n\t" #else @@ -514,7 +514,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src, "ins %[temp0], %[temp5], 16, 4 \n\t" "addiu %[src], %[src], 4 \n\t" "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t" -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) "ush %[temp0], 0(%[dst]) \n\t" #else "wsbh %[temp0], %[temp0] \n\t" @@ -570,7 +570,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src, "ins %[temp2], %[temp3], 0, 5 \n\t" "addiu %[src], %[src], 16 \n\t" "append %[temp2], %[temp1], 16 \n\t" -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) "usw %[temp0], 0(%[dst]) \n\t" "usw %[temp2], 4(%[dst]) \n\t" #else @@ -592,7 +592,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src, "ins %[temp4], %[temp5], 0, 11 \n\t" "addiu %[src], %[src], 4 \n\t" "ins %[temp4], %[temp0], 0, 5 \n\t" -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) "ush %[temp4], 0(%[dst]) \n\t" #else "wsbh %[temp4], %[temp4] \n\t" diff --git a/src/dsp/lossless_sse2.c b/src/dsp/lossless_sse2.c index b03e0ed2..1cd3f134 100644 --- a/src/dsp/lossless_sse2.c +++ b/src/dsp/lossless_sse2.c @@ -570,7 +570,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src, const __m128i ga2 = _mm_and_si128(ga1, mask_0x0f); // g0-|g1-|...|a6-|a7- const __m128i rgba0 = _mm_or_si128(ga2, rb1); // rg0..rg7 | ba0..ba7 const __m128i rgba1 = _mm_srli_si128(rgba0, 8); // ba0..ba7 | 0 -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) const __m128i rgba = _mm_unpacklo_epi8(rgba1, rgba0); // barg0...barg7 #else const __m128i rgba = _mm_unpacklo_epi8(rgba0, rgba1); // rgba0...rgba7 @@ -611,7 +611,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src, const __m128i rg1 = _mm_or_si128(rb1, g_lo2); // gr0...gr7|xx const __m128i b1 = _mm_srli_epi16(b0, 3); const __m128i gb1 = _mm_or_si128(b1, g_hi2); // bg0...bg7|xx -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) const __m128i rgba = _mm_unpacklo_epi8(gb1, rg1); // rggb0...rggb7 #else const __m128i rgba = _mm_unpacklo_epi8(rg1, gb1); // bgrb0...bgrb7 diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c index 541bfd8b..5cd4e748 100644 --- a/src/dsp/upsampling.c +++ b/src/dsp/upsampling.c @@ -141,7 +141,6 @@ DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb) WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) { WebPInitUpsamplers(); - VP8YUVInit(); #ifdef FANCY_UPSAMPLING return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB]; #else diff --git a/src/dsp/upsampling_mips_dsp_r2.c b/src/dsp/upsampling_mips_dsp_r2.c index ed2eb748..3e69ee0e 100644 --- a/src/dsp/upsampling_mips_dsp_r2.c +++ b/src/dsp/upsampling_mips_dsp_r2.c @@ -19,8 +19,6 @@ #include #include "./yuv.h" -#if !defined(WEBP_YUV_USE_TABLE) - #define YUV_TO_RGB(Y, U, V, R, G, B) do { \ const int t1 = MultHi(Y, 19077); \ const int t2 = MultHi(V, 13320); \ @@ -68,7 +66,7 @@ static WEBP_INLINE void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) { { const int rg = (r & 0xf8) | (g >> 5); const int gb = ((g << 3) & 0xe0) | (b >> 3); -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) rgb[0] = gb; rgb[1] = rg; #else @@ -84,7 +82,7 @@ static WEBP_INLINE void YuvToRgba4444(int y, int u, int v, { const int rg = (r & 0xf0) | (g >> 4); const int ba = (b & 0xf0) | 0x0f; // overwrite the lower 4 bits -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) argb[0] = ba; argb[1] = rg; #else @@ -93,7 +91,6 @@ static WEBP_INLINE void YuvToRgba4444(int y, int u, int v, #endif } } -#endif // WEBP_YUV_USE_TABLE //----------------------------------------------------------------------------- // Alpha handling variants diff --git a/src/dsp/upsampling_msa.c b/src/dsp/upsampling_msa.c index 561e4525..55f4de0c 100644 --- a/src/dsp/upsampling_msa.c +++ b/src/dsp/upsampling_msa.c @@ -274,7 +274,7 @@ static void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) { const int b = Clip8(b1 >> 6); const int rg = (r & 0xf8) | (g >> 5); const int gb = ((g << 3) & 0xe0) | (b >> 3); -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) rgb[0] = gb; rgb[1] = rg; #else @@ -293,7 +293,7 @@ static void YuvToRgba4444(int y, int u, int v, uint8_t* const argb) { const int b = Clip8(b1 >> 6); const int rg = (r & 0xf0) | (g >> 4); const int ba = (b & 0xf0) | 0x0f; // overwrite the lower 4 bits -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) argb[0] = ba; argb[1] = rg; #else @@ -459,11 +459,11 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int length) { v16u8 R, G, B, RG, BA, tmp0, tmp1; while (length >= 16) { - #ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) CALC_RGBA4444(y, u, v, BA, RG, 16, dst); - #else +#else CALC_RGBA4444(y, u, v, RG, BA, 16, dst); - #endif +#endif y += 16; u += 16; v += 16; @@ -473,7 +473,7 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u, if (length > 8) { uint8_t temp[2 * 16] = { 0 }; memcpy(temp, y, length * sizeof(*temp)); -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) CALC_RGBA4444(temp, u, v, BA, RG, 16, temp); #else CALC_RGBA4444(temp, u, v, RG, BA, 16, temp); @@ -482,7 +482,7 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u, } else if (length > 0) { uint8_t temp[2 * 8] = { 0 }; memcpy(temp, y, length * sizeof(*temp)); -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) CALC_RGBA4444(temp, u, v, BA, RG, 8, temp); #else CALC_RGBA4444(temp, u, v, RG, BA, 8, temp); @@ -495,11 +495,11 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int length) { v16u8 R, G, B, RG, GB, tmp0, tmp1; while (length >= 16) { - #ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) CALC_RGB565(y, u, v, GB, RG, 16, dst); - #else +#else CALC_RGB565(y, u, v, RG, GB, 16, dst); - #endif +#endif y += 16; u += 16; v += 16; @@ -509,7 +509,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u, if (length > 8) { uint8_t temp[2 * 16] = { 0 }; memcpy(temp, y, length * sizeof(*temp)); -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) CALC_RGB565(temp, u, v, GB, RG, 16, temp); #else CALC_RGB565(temp, u, v, RG, GB, 16, temp); @@ -518,7 +518,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u, } else if (length > 0) { uint8_t temp[2 * 8] = { 0 }; memcpy(temp, y, length * sizeof(*temp)); -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) CALC_RGB565(temp, u, v, GB, RG, 8, temp); #else CALC_RGB565(temp, u, v, RG, GB, 8, temp); diff --git a/src/dsp/yuv.c b/src/dsp/yuv.c index 43121dc3..310826f8 100644 --- a/src/dsp/yuv.c +++ b/src/dsp/yuv.c @@ -15,59 +15,6 @@ #include -#if defined(WEBP_YUV_USE_TABLE) - -static int done = 0; - -static WEBP_INLINE uint8_t clip(int v, int max_value) { - return v < 0 ? 0 : v > max_value ? max_value : v; -} - -int16_t VP8kVToR[256], VP8kUToB[256]; -int32_t VP8kVToG[256], VP8kUToG[256]; -uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN]; -uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN]; - -WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) { - int i; - if (done) { - return; - } -#ifndef USE_YUVj - for (i = 0; i < 256; ++i) { - VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX; - VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF; - VP8kVToG[i] = -45773 * (i - 128); - VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX; - } - for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) { - const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX; - VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255); - VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15); - } -#else - for (i = 0; i < 256; ++i) { - VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX; - VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF; - VP8kVToG[i] = -46802 * (i - 128); - VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX; - } - for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) { - const int k = i; - VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255); - VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15); - } -#endif - - done = 1; -} - -#else - -WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {} - -#endif // WEBP_YUV_USE_TABLE - //----------------------------------------------------------------------------- // Plain-C version diff --git a/src/dsp/yuv.h b/src/dsp/yuv.h index eaffff3e..92959bbd 100644 --- a/src/dsp/yuv.h +++ b/src/dsp/yuv.h @@ -38,16 +38,6 @@ #include "./dsp.h" #include "../dec/vp8_dec.h" -#if defined(WEBP_EXPERIMENTAL_FEATURES) -// Do NOT activate this feature for real compression. This is only experimental! -// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace. -// This colorspace is close to Rec.601's Y'CbCr model with the notable -// difference of allowing larger range for luma/chroma. -// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its -// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion -// #define USE_YUVj -#endif - //------------------------------------------------------------------------------ // YUV -> RGB conversion @@ -111,7 +101,7 @@ static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v, const int b = VP8YUVToB(y, u); // 5 usable bits const int rg = (r & 0xf8) | (g >> 5); const int gb = ((g << 3) & 0xe0) | (b >> 3); -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) rgb[0] = gb; rgb[1] = rg; #else @@ -127,7 +117,7 @@ static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v, const int b = VP8YUVToB(y, u); // 4 usable bits const int rg = (r & 0xf0) | (g >> 4); const int ba = (b & 0xf0) | 0x0f; // overwrite the lower 4 bits -#ifdef WEBP_SWAP_16BIT_CSP +#if (WEBP_SWAP_16BIT_CSP == 1) argb[0] = ba; argb[1] = rg; #else @@ -157,9 +147,6 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v, rgba[3] = 0xff; } -// Must be called before everything, to initialize the tables. -void VP8YUVInit(void); - //----------------------------------------------------------------------------- // SSE2 extra functions (mostly for upsampling_sse2.c) @@ -192,8 +179,6 @@ static WEBP_INLINE int VP8ClipUV(int uv, int rounding) { return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255; } -#ifndef USE_YUVj - static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) { const int luma = 16839 * r + 33059 * g + 6420 * b; return (luma + rounding + (16 << YUV_FIX)) >> YUV_FIX; // no need to clip @@ -209,28 +194,6 @@ static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) { return VP8ClipUV(v, rounding); } -#else - -// This JPEG-YUV colorspace, only for comparison! -// These are also 16bit precision coefficients from Rec.601, but with full -// [0..255] output range. -static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) { - const int luma = 19595 * r + 38470 * g + 7471 * b; - return (luma + rounding) >> YUV_FIX; // no need to clip -} - -static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) { - const int u = -11058 * r - 21710 * g + 32768 * b; - return VP8ClipUV(u, rounding); -} - -static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) { - const int v = 32768 * r - 27439 * g - 5329 * b; - return VP8ClipUV(v, rounding); -} - -#endif // USE_YUVj - #ifdef __cplusplus } // extern "C" #endif diff --git a/src/dsp/yuv_sse2.c b/src/dsp/yuv_sse2.c index f82a2138..ea0bd863 100644 --- a/src/dsp/yuv_sse2.c +++ b/src/dsp/yuv_sse2.c @@ -119,7 +119,7 @@ static WEBP_INLINE void PackAndStore4444(const __m128i* const R, const __m128i* const B, const __m128i* const A, uint8_t* const dst) { -#if !defined(WEBP_SWAP_16BIT_CSP) +#if (WEBP_SWAP_16BIT_CSP == 0) const __m128i rg0 = _mm_packus_epi16(*R, *G); const __m128i ba0 = _mm_packus_epi16(*B, *A); #else @@ -149,7 +149,7 @@ static WEBP_INLINE void PackAndStore565(const __m128i* const R, const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3); const __m128i rg = _mm_or_si128(r1, g1); const __m128i gb = _mm_or_si128(g2, b1); -#if !defined(WEBP_SWAP_16BIT_CSP) +#if (WEBP_SWAP_16BIT_CSP == 0) const __m128i rgb565 = _mm_unpacklo_epi8(rg, gb); #else const __m128i rgb565 = _mm_unpacklo_epi8(gb, rg);