diff --git a/CMakeLists.txt b/CMakeLists.txt
index 93c9c56d..6c3376c6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,7 +35,7 @@ string(REGEX MATCH "[0-9.]+" WEBP_VERSION ${SOURCE_FILE})
 ################################################################################
 # Options.
 if(WEBP_ENABLE_SWAP_16BIT_CSP)
-  add_definitions(-DWEBP_SWAP_16BIT_CSP)
+  add_definitions(-DWEBP_SWAP_16BIT_CSP=1)
 endif()
 
 ################################################################################
diff --git a/configure.ac b/configure.ac
index bb3e3114..9902796b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -659,7 +659,7 @@ if test "$enable_wic" = "yes"; then
 fi
 esac
 
-dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP
+dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP=1
 
 USE_SWAP_16BIT_CSP=""
 AC_MSG_CHECKING(if --enable-swap-16bit-csp option is specified)
@@ -667,7 +667,7 @@ AC_ARG_ENABLE([swap-16bit-csp],
               AS_HELP_STRING([--enable-swap-16bit-csp],
                              [Enable byte swap for 16 bit colorspaces]))
 if test "$enable_swap_16bit_csp" = "yes"; then
-  USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP"
+  USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP=1"
 fi
 AC_MSG_RESULT(${enable_swap_16bit_csp-no})
 AC_SUBST(USE_SWAP_16BIT_CSP)
diff --git a/makefile.unix b/makefile.unix
index fad33e90..223584c6 100644
--- a/makefile.unix
+++ b/makefile.unix
@@ -57,7 +57,7 @@ endif
 # EXTRA_FLAGS += -DWEBP_EXPERIMENTAL_FEATURES
 
 # Extra flags to enable byte swap for 16 bit colorspaces.
-# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP
+# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP=1
 
 # Extra flags to enable multi-threading
 EXTRA_FLAGS += -DWEBP_USE_THREAD
diff --git a/src/dec/io_dec.c b/src/dec/io_dec.c
index 8bfab869..83a9701d 100644
--- a/src/dec/io_dec.c
+++ b/src/dec/io_dec.c
@@ -212,7 +212,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
     int num_rows;
     const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
     uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     uint8_t* alpha_dst = base_rgba;
 #else
     uint8_t* alpha_dst = base_rgba + 1;
@@ -431,7 +431,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
                                int max_lines_out) {
   const WebPRGBABuffer* const buf = &p->output->u.RGBA;
   uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   uint8_t* alpha_dst = base_rgba;
 #else
   uint8_t* alpha_dst = base_rgba + 1;
@@ -598,9 +598,6 @@ static int CustomSetup(VP8Io* io) {
     }
   }
 
-  if (is_rgb) {
-    VP8YUVInit();
-  }
   return 1;
 }
 
diff --git a/src/dec/tree_dec.c b/src/dec/tree_dec.c
index 9e805f60..147b5cf9 100644
--- a/src/dec/tree_dec.c
+++ b/src/dec/tree_dec.c
@@ -14,12 +14,16 @@
 #include "./vp8i_dec.h"
 #include "../utils/bit_reader_inl_utils.h"
 
+#if !defined(USE_GENERIC_TREE)
 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
 // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
-#define USE_GENERIC_TREE
+#define USE_GENERIC_TREE 1   // ALTERNATE_CODE
+#else
+#define USE_GENERIC_TREE 0
 #endif
+#endif  // USE_GENERIC_TREE
 
-#ifdef USE_GENERIC_TREE
+#if (USE_GENERIC_TREE == 1)
 static const int8_t kYModesIntra4[18] = {
   -B_DC_PRED, 1,
     -B_TM_PRED, 2,
@@ -317,7 +321,7 @@ static void ParseIntraMode(VP8BitReader* const br,
       int x;
       for (x = 0; x < 4; ++x) {
         const uint8_t* const prob = kBModesProba[top[x]][ymode];
-#ifdef USE_GENERIC_TREE
+#if (USE_GENERIC_TREE == 1)
         // Generic tree-parsing
         int i = kYModesIntra4[VP8GetBit(br, prob[0])];
         while (i > 0) {
@@ -335,7 +339,7 @@ static void ParseIntraMode(VP8BitReader* const br,
                         (!VP8GetBit(br, prob[6]) ? B_LD_PRED :
                           (!VP8GetBit(br, prob[7]) ? B_VL_PRED :
                             (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
-#endif    // USE_GENERIC_TREE
+#endif  // USE_GENERIC_TREE
         top[x] = ymode;
       }
       memcpy(modes, top, 4 * sizeof(*top));
diff --git a/src/dsp/alpha_processing.c b/src/dsp/alpha_processing.c
index 0f31a66e..a85bde17 100644
--- a/src/dsp/alpha_processing.c
+++ b/src/dsp/alpha_processing.c
@@ -15,7 +15,10 @@
 #include "./dsp.h"
 
 // Tables can be faster on some platform but incur some extra binary size (~2k).
-// #define USE_TABLES_FOR_ALPHA_MULT
+#if !defined(USE_TABLES_FOR_ALPHA_MULT)
+#define USE_TABLES_FOR_ALPHA_MULT 0   // ALTERNATE_CODE
+#endif
+
 
 // -----------------------------------------------------------------------------
 
@@ -29,7 +32,7 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
   return v;
 }
 
-#ifdef USE_TABLES_FOR_ALPHA_MULT
+#if (USE_TABLES_FOR_ALPHA_MULT == 1)
 
 static const uint32_t kMultTables[2][256] = {
   {    // (255u << MFIX) / alpha
@@ -132,7 +135,7 @@ static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
   return inverse ? (255u << MFIX) / a : a * KINV_255;
 }
 
-#endif    // USE_TABLES_FOR_ALPHA_MULT
+#endif  // USE_TABLES_FOR_ALPHA_MULT
 
 void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
   int x;
@@ -277,7 +280,7 @@ static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
 
 static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
                                      int w, int h, int stride) {
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
 #else
   ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0);
diff --git a/src/dsp/dec_clip_tables.c b/src/dsp/dec_clip_tables.c
index 74ba34c0..beb051a3 100644
--- a/src/dsp/dec_clip_tables.c
+++ b/src/dsp/dec_clip_tables.c
@@ -13,9 +13,12 @@
 
 #include "./dsp.h"
 
-#define USE_STATIC_TABLES     // undefine to have run-time table initialization
+// define to 0 to have run-time table initialization
+#if !defined(USE_STATIC_TABLES)
+#define USE_STATIC_TABLES 1   // ALTERNATE_CODE
+#endif
 
-#ifdef USE_STATIC_TABLES
+#if (USE_STATIC_TABLES == 1)
 
 static const uint8_t abs0[255 + 255 + 1] = {
   0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
@@ -337,7 +340,7 @@ static uint8_t clip1[255 + 511 + 1];
 // and make sure it's set to true _last_ (so as to be thread-safe)
 static volatile int tables_ok = 0;
 
-#endif
+#endif    // USE_STATIC_TABLES
 
 const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
 const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
@@ -345,7 +348,7 @@ const uint8_t* const VP8kclip1 = &clip1[255];
 const uint8_t* const VP8kabs0 = &abs0[255];
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
-#if !defined(USE_STATIC_TABLES)
+#if (USE_STATIC_TABLES == 0)
   int i;
   if (!tables_ok) {
     for (i = -255; i <= 255; ++i) {
diff --git a/src/dsp/dec_sse2.c b/src/dsp/dec_sse2.c
index 8797688a..0704fd04 100644
--- a/src/dsp/dec_sse2.c
+++ b/src/dsp/dec_sse2.c
@@ -18,7 +18,9 @@
 
 // The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
 // one it seems => disable it by default. Uncomment the following to enable:
-// #define USE_TRANSFORM_AC3
+#if !defined(USE_TRANSFORM_AC3)
+#define USE_TRANSFORM_AC3 0   // ALTERNATE_CODE
+#endif
 
 #include <emmintrin.h>
 #include "./common_sse2.h"
@@ -193,7 +195,7 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
   }
 }
 
-#if defined(USE_TRANSFORM_AC3)
+#if (USE_TRANSFORM_AC3 == 1)
 #define MUL(a, b) (((a) * (b)) >> 16)
 static void TransformAC3(const int16_t* in, uint8_t* dst) {
   static const int kC1 = 20091 + (1 << 16);
@@ -1182,8 +1184,8 @@ extern void VP8DspInitSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
   VP8Transform = Transform;
-#if defined(USE_TRANSFORM_AC3)
-  VP8TransformAC3 = TransformAC3;
+#if (USE_TRANSFORM_AC3 == 1)
+  VP8TransformAC3 = TransformAC3_SSE2;
 #endif
 
   VP8VFilter16 = VFilter16;
diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h
index 4cec8a18..73ed55b5 100644
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -134,6 +134,11 @@ extern "C" {
 #endif
 #endif
 
+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
+#if !defined(WEBP_SWAP_16BIT_CSP)
+#define WEBP_SWAP_16BIT_CSP 0
+#endif
+
 typedef enum {
   kSSE2,
   kSSE3,
diff --git a/src/dsp/filters_neon.c b/src/dsp/filters_neon.c
index 4d6e50cc..6b9c0668 100644
--- a/src/dsp/filters_neon.c
+++ b/src/dsp/filters_neon.c
@@ -251,9 +251,11 @@ static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
 // GradientUnfilter_NEON is correct but slower than the C-version,
 // at least on ARM64. For armv7, it's a wash.
 // So best is to disable it for now, but keep the idea around...
-// #define USE_GRADIENT_UNFILTER
+#if !defined(USE_GRADIENT_UNFILTER)
+#define USE_GRADIENT_UNFILTER 0   // ALTERNATE_CODE
+#endif
 
-#if defined(USE_GRADIENT_UNFILTER)
+#if (USE_GRADIENT_UNFILTER == 1)
 #define GRAD_PROCESS_LANE(L)  do {                                             \
   const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1);  /* rotate predictor in */   \
   const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1));                       \
@@ -311,7 +313,7 @@ extern void VP8FiltersInitNEON(void);
 WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitNEON(void) {
   WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_NEON;
   WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_NEON;
-#if defined(USE_GRADIENT_UNFILTER)
+#if (USE_GRADIENT_UNFILTER == 1)
   WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_NEON;
 #endif
 
diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
index 20d18f6e..de461e65 100644
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -452,7 +452,7 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
     const uint32_t argb = *src++;
     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     *dst++ = ba;
     *dst++ = rg;
 #else
@@ -469,7 +469,7 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
     const uint32_t argb = *src++;
     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     *dst++ = gb;
     *dst++ = rg;
 #else
@@ -496,22 +496,7 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
     const uint32_t* const src_end = src + num_pixels;
     while (src < src_end) {
       const uint32_t argb = *src++;
-
-#if !defined(WORDS_BIGENDIAN)
-#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
       WebPUint32ToMem(dst, BSwap32(argb));
-#else  // WEBP_REFERENCE_IMPLEMENTATION
-      dst[0] = (argb >> 24) & 0xff;
-      dst[1] = (argb >> 16) & 0xff;
-      dst[2] = (argb >>  8) & 0xff;
-      dst[3] = (argb >>  0) & 0xff;
-#endif
-#else  // WORDS_BIGENDIAN
-      dst[0] = (argb >>  0) & 0xff;
-      dst[1] = (argb >>  8) & 0xff;
-      dst[2] = (argb >> 16) & 0xff;
-      dst[3] = (argb >> 24) & 0xff;
-#endif
       dst += sizeof(argb);
     }
   } else {
diff --git a/src/dsp/lossless_mips_dsp_r2.c b/src/dsp/lossless_mips_dsp_r2.c
index 2984ce8d..6d7f6e87 100644
--- a/src/dsp/lossless_mips_dsp_r2.c
+++ b/src/dsp/lossless_mips_dsp_r2.c
@@ -492,7 +492,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
     "ins            %[temp3],    %[temp5],          16,   4    \n\t"
     "addiu          %[src],      %[src],            16         \n\t"
     "precr.qb.ph    %[temp3],    %[temp3],          %[temp2]   \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     "usw            %[temp1],    0(%[dst])                     \n\t"
     "usw            %[temp3],    4(%[dst])                     \n\t"
 #else
@@ -514,7 +514,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
     "ins            %[temp0],    %[temp5],          16,   4    \n\t"
     "addiu          %[src],      %[src],            4          \n\t"
     "precr.qb.ph    %[temp0],    %[temp0],          %[temp0]   \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     "ush            %[temp0],    0(%[dst])                     \n\t"
 #else
     "wsbh           %[temp0],    %[temp0]                      \n\t"
@@ -570,7 +570,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
     "ins            %[temp2],    %[temp3],          0,    5    \n\t"
     "addiu          %[src],      %[src],            16         \n\t"
     "append         %[temp2],    %[temp1],          16         \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     "usw            %[temp0],    0(%[dst])                     \n\t"
     "usw            %[temp2],    4(%[dst])                     \n\t"
 #else
@@ -592,7 +592,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
     "ins            %[temp4],    %[temp5],          0,    11   \n\t"
     "addiu          %[src],      %[src],            4          \n\t"
     "ins            %[temp4],    %[temp0],          0,    5    \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     "ush            %[temp4],    0(%[dst])                     \n\t"
 #else
     "wsbh           %[temp4],    %[temp4]                      \n\t"
diff --git a/src/dsp/lossless_sse2.c b/src/dsp/lossless_sse2.c
index b03e0ed2..1cd3f134 100644
--- a/src/dsp/lossless_sse2.c
+++ b/src/dsp/lossless_sse2.c
@@ -570,7 +570,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
     const __m128i ga2 = _mm_and_si128(ga1, mask_0x0f);  // g0-|g1-|...|a6-|a7-
     const __m128i rgba0 = _mm_or_si128(ga2, rb1);       // rg0..rg7 | ba0..ba7
     const __m128i rgba1 = _mm_srli_si128(rgba0, 8);     // ba0..ba7 | 0
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     const __m128i rgba = _mm_unpacklo_epi8(rgba1, rgba0);  // barg0...barg7
 #else
     const __m128i rgba = _mm_unpacklo_epi8(rgba0, rgba1);  // rgba0...rgba7
@@ -611,7 +611,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
     const __m128i rg1 = _mm_or_si128(rb1, g_lo2);           // gr0...gr7|xx
     const __m128i b1 = _mm_srli_epi16(b0, 3);
     const __m128i gb1 = _mm_or_si128(b1, g_hi2);            // bg0...bg7|xx
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     const __m128i rgba = _mm_unpacklo_epi8(gb1, rg1);     // rggb0...rggb7
 #else
     const __m128i rgba = _mm_unpacklo_epi8(rg1, gb1);     // bgrb0...bgrb7
diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c
index 541bfd8b..5cd4e748 100644
--- a/src/dsp/upsampling.c
+++ b/src/dsp/upsampling.c
@@ -141,7 +141,6 @@ DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb)
 
 WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
   WebPInitUpsamplers();
-  VP8YUVInit();
 #ifdef FANCY_UPSAMPLING
   return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB];
 #else
diff --git a/src/dsp/upsampling_mips_dsp_r2.c b/src/dsp/upsampling_mips_dsp_r2.c
index ed2eb748..3e69ee0e 100644
--- a/src/dsp/upsampling_mips_dsp_r2.c
+++ b/src/dsp/upsampling_mips_dsp_r2.c
@@ -19,8 +19,6 @@
 #include <assert.h>
 #include "./yuv.h"
 
-#if !defined(WEBP_YUV_USE_TABLE)
-
 #define YUV_TO_RGB(Y, U, V, R, G, B) do {                                      \
     const int t1 = MultHi(Y, 19077);                                           \
     const int t2 = MultHi(V, 13320);                                           \
@@ -68,7 +66,7 @@ static WEBP_INLINE void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
   {
     const int rg = (r & 0xf8) | (g >> 5);
     const int gb = ((g << 3) & 0xe0) | (b >> 3);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     rgb[0] = gb;
     rgb[1] = rg;
 #else
@@ -84,7 +82,7 @@ static WEBP_INLINE void YuvToRgba4444(int y, int u, int v,
   {
     const int rg = (r & 0xf0) | (g >> 4);
     const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     argb[0] = ba;
     argb[1] = rg;
 #else
@@ -93,7 +91,6 @@ static WEBP_INLINE void YuvToRgba4444(int y, int u, int v,
 #endif
    }
 }
-#endif  // WEBP_YUV_USE_TABLE
 
 //-----------------------------------------------------------------------------
 // Alpha handling variants
diff --git a/src/dsp/upsampling_msa.c b/src/dsp/upsampling_msa.c
index 561e4525..55f4de0c 100644
--- a/src/dsp/upsampling_msa.c
+++ b/src/dsp/upsampling_msa.c
@@ -274,7 +274,7 @@ static void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
   const int b = Clip8(b1 >> 6);
   const int rg = (r & 0xf8) | (g >> 5);
   const int gb = ((g << 3) & 0xe0) | (b >> 3);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   rgb[0] = gb;
   rgb[1] = rg;
 #else
@@ -293,7 +293,7 @@ static void YuvToRgba4444(int y, int u, int v, uint8_t* const argb) {
   const int b = Clip8(b1 >> 6);
   const int rg = (r & 0xf0) | (g >> 4);
   const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   argb[0] = ba;
   argb[1] = rg;
 #else
@@ -459,11 +459,11 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
                               const uint8_t* v, uint8_t* dst, int length) {
   v16u8 R, G, B, RG, BA, tmp0, tmp1;
   while (length >= 16) {
-  #ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGBA4444(y, u, v, BA, RG, 16, dst);
-  #else
+#else
     CALC_RGBA4444(y, u, v, RG, BA, 16, dst);
-  #endif
+#endif
     y      += 16;
     u      += 16;
     v      += 16;
@@ -473,7 +473,7 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
   if (length > 8) {
     uint8_t temp[2 * 16] = { 0 };
     memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGBA4444(temp, u, v, BA, RG, 16, temp);
 #else
     CALC_RGBA4444(temp, u, v, RG, BA, 16, temp);
@@ -482,7 +482,7 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
   } else if (length > 0) {
     uint8_t temp[2 * 8] = { 0 };
     memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGBA4444(temp, u, v, BA, RG, 8, temp);
 #else
     CALC_RGBA4444(temp, u, v, RG, BA, 8, temp);
@@ -495,11 +495,11 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
                             const uint8_t* v, uint8_t* dst, int length) {
   v16u8 R, G, B, RG, GB, tmp0, tmp1;
   while (length >= 16) {
-  #ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGB565(y, u, v, GB, RG, 16, dst);
-  #else
+#else
     CALC_RGB565(y, u, v, RG, GB, 16, dst);
-  #endif
+#endif
     y      += 16;
     u      += 16;
     v      += 16;
@@ -509,7 +509,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
   if (length > 8) {
     uint8_t temp[2 * 16] = { 0 };
     memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGB565(temp, u, v, GB, RG, 16, temp);
 #else
     CALC_RGB565(temp, u, v, RG, GB, 16, temp);
@@ -518,7 +518,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
   } else if (length > 0) {
     uint8_t temp[2 * 8] = { 0 };
     memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGB565(temp, u, v, GB, RG, 8, temp);
 #else
     CALC_RGB565(temp, u, v, RG, GB, 8, temp);
diff --git a/src/dsp/yuv.c b/src/dsp/yuv.c
index 43121dc3..310826f8 100644
--- a/src/dsp/yuv.c
+++ b/src/dsp/yuv.c
@@ -15,59 +15,6 @@
 
 #include <stdlib.h>
 
-#if defined(WEBP_YUV_USE_TABLE)
-
-static int done = 0;
-
-static WEBP_INLINE uint8_t clip(int v, int max_value) {
-  return v < 0 ? 0 : v > max_value ? max_value : v;
-}
-
-int16_t VP8kVToR[256], VP8kUToB[256];
-int32_t VP8kVToG[256], VP8kUToG[256];
-uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
-uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {
-  int i;
-  if (done) {
-    return;
-  }
-#ifndef USE_YUVj
-  for (i = 0; i < 256; ++i) {
-    VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
-    VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
-    VP8kVToG[i] = -45773 * (i - 128);
-    VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX;
-  }
-  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
-    const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX;
-    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
-    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
-  }
-#else
-  for (i = 0; i < 256; ++i) {
-    VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
-    VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
-    VP8kVToG[i] = -46802 * (i - 128);
-    VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
-  }
-  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
-    const int k = i;
-    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
-    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
-  }
-#endif
-
-  done = 1;
-}
-
-#else
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {}
-
-#endif  // WEBP_YUV_USE_TABLE
-
 //-----------------------------------------------------------------------------
 // Plain-C version
 
diff --git a/src/dsp/yuv.h b/src/dsp/yuv.h
index eaffff3e..92959bbd 100644
--- a/src/dsp/yuv.h
+++ b/src/dsp/yuv.h
@@ -38,16 +38,6 @@
 #include "./dsp.h"
 #include "../dec/vp8_dec.h"
 
-#if defined(WEBP_EXPERIMENTAL_FEATURES)
-// Do NOT activate this feature for real compression. This is only experimental!
-// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace.
-// This colorspace is close to Rec.601's Y'CbCr model with the notable
-// difference of allowing larger range for luma/chroma.
-// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its
-// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
-// #define USE_YUVj
-#endif
-
 //------------------------------------------------------------------------------
 // YUV -> RGB conversion
 
@@ -111,7 +101,7 @@ static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v,
   const int b = VP8YUVToB(y, u);      // 5 usable bits
   const int rg = (r & 0xf8) | (g >> 5);
   const int gb = ((g << 3) & 0xe0) | (b >> 3);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   rgb[0] = gb;
   rgb[1] = rg;
 #else
@@ -127,7 +117,7 @@ static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v,
   const int b = VP8YUVToB(y, u);        // 4 usable bits
   const int rg = (r & 0xf0) | (g >> 4);
   const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   argb[0] = ba;
   argb[1] = rg;
 #else
@@ -157,9 +147,6 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
   rgba[3] = 0xff;
 }
 
-// Must be called before everything, to initialize the tables.
-void VP8YUVInit(void);
-
 //-----------------------------------------------------------------------------
 // SSE2 extra functions (mostly for upsampling_sse2.c)
 
@@ -192,8 +179,6 @@ static WEBP_INLINE int VP8ClipUV(int uv, int rounding) {
   return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255;
 }
 
-#ifndef USE_YUVj
-
 static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
   const int luma = 16839 * r + 33059 * g + 6420 * b;
   return (luma + rounding + (16 << YUV_FIX)) >> YUV_FIX;  // no need to clip
@@ -209,28 +194,6 @@ static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
   return VP8ClipUV(v, rounding);
 }
 
-#else
-
-// This JPEG-YUV colorspace, only for comparison!
-// These are also 16bit precision coefficients from Rec.601, but with full
-// [0..255] output range.
-static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
-  const int luma = 19595 * r + 38470 * g + 7471 * b;
-  return (luma + rounding) >> YUV_FIX;  // no need to clip
-}
-
-static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
-  const int u = -11058 * r - 21710 * g + 32768 * b;
-  return VP8ClipUV(u, rounding);
-}
-
-static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
-  const int v = 32768 * r - 27439 * g - 5329 * b;
-  return VP8ClipUV(v, rounding);
-}
-
-#endif    // USE_YUVj
-
 #ifdef __cplusplus
 }    // extern "C"
 #endif
diff --git a/src/dsp/yuv_sse2.c b/src/dsp/yuv_sse2.c
index f82a2138..ea0bd863 100644
--- a/src/dsp/yuv_sse2.c
+++ b/src/dsp/yuv_sse2.c
@@ -119,7 +119,7 @@ static WEBP_INLINE void PackAndStore4444(const __m128i* const R,
                                          const __m128i* const B,
                                          const __m128i* const A,
                                          uint8_t* const dst) {
-#if !defined(WEBP_SWAP_16BIT_CSP)
+#if (WEBP_SWAP_16BIT_CSP == 0)
   const __m128i rg0 = _mm_packus_epi16(*R, *G);
   const __m128i ba0 = _mm_packus_epi16(*B, *A);
 #else
@@ -149,7 +149,7 @@ static WEBP_INLINE void PackAndStore565(const __m128i* const R,
   const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3);
   const __m128i rg = _mm_or_si128(r1, g1);
   const __m128i gb = _mm_or_si128(g2, b1);
-#if !defined(WEBP_SWAP_16BIT_CSP)
+#if (WEBP_SWAP_16BIT_CSP == 0)
   const __m128i rgb565 = _mm_unpacklo_epi8(rg, gb);
 #else
   const __m128i rgb565 = _mm_unpacklo_epi8(gb, rg);