diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index 8b0fa70a..01c3274f 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -712,13 +712,15 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
   uint32_t* const rows_out = dec->argb_cache_;
 
   // Inverse transforms.
-  // TODO: most transforms only need to operate on the cropped region only.
-  memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
   while (n-- > 0) {
     VP8LTransform* const transform = &dec->transforms_[n];
     VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out);
     rows_in = rows_out;
   }
+  if (rows_in != rows_out) {
+    // No transform called, hence just copy.
+    memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
+  }
 }
 
 // Processes (transforms, scales & color-converts) the rows decoded after the
diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
index 1a4c3ac6..2f438731 100644
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -234,15 +234,16 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
 
 // Add green to blue and red channels (i.e. perform the inverse transform of
 // 'subtract green').
-void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
+void VP8LAddGreenToBlueAndRed_C(const uint32_t* const src, int num_pixels,
+                                uint32_t* dst) {
   int i;
   for (i = 0; i < num_pixels; ++i) {
-    const uint32_t argb = data[i];
+    const uint32_t argb = src[i];
     const uint32_t green = ((argb >> 8) & 0xff);
     uint32_t red_blue = (argb & 0x00ff00ffu);
     red_blue += (green << 16) | green;
     red_blue &= 0x00ff00ffu;
-    data[i] = (argb & 0xff00ff00u) | red_blue;
+    dst[i] = (argb & 0xff00ff00u) | red_blue;
   }
 }
 
@@ -258,11 +259,12 @@ static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
   m->red_to_blue_   = (color_code >> 16) & 0xff;
 }
 
-void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
-                                 int num_pixels) {
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
+                                 const uint32_t* const src, int num_pixels,
+                                 uint32_t* const dst) {
   int i;
   for (i = 0; i < num_pixels; ++i) {
-    const uint32_t argb = data[i];
+    const uint32_t argb = src[i];
     const uint32_t green = argb >> 8;
     const uint32_t red = argb >> 16;
     int new_red = red;
@@ -272,13 +274,14 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
     new_blue += ColorTransformDelta(m->green_to_blue_, green);
     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
     new_blue &= 0xff;
-    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+    dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
   }
 }
 
 // Color space inverse transform.
 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
-                                       int y_start, int y_end, uint32_t* data) {
+                                       int y_start, int y_end,
+                                       const uint32_t* src, uint32_t* dst) {
   const int width = transform->xsize_;
   const int tile_width = 1 << transform->bits_;
   const int mask = tile_width - 1;
@@ -292,17 +295,19 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
   while (y < y_end) {
     const uint32_t* pred = pred_row;
     VP8LMultipliers m = { 0, 0, 0 };
-    const uint32_t* const data_safe_end = data + safe_width;
-    const uint32_t* const data_end = data + width;
-    while (data < data_safe_end) {
+    const uint32_t* const src_safe_end = src + safe_width;
+    const uint32_t* const src_end = src + width;
+    while (src < src_safe_end) {
       ColorCodeToMultipliers(*pred++, &m);
-      VP8LTransformColorInverse(&m, data, tile_width);
-      data += tile_width;
+      VP8LTransformColorInverse(&m, src, tile_width, dst);
+      src += tile_width;
+      dst += tile_width;
     }
-    if (data < data_end) {  // Left-overs using C-version.
+    if (src < src_end) {  // Left-overs using C-version.
       ColorCodeToMultipliers(*pred++, &m);
-      VP8LTransformColorInverse(&m, data, remaining_width);
-      data += remaining_width;
+      VP8LTransformColorInverse(&m, src, remaining_width, dst);
+      src += remaining_width;
+      dst += remaining_width;
     }
     ++y;
     if ((y & mask) == 0) pred_row += tiles_per_row;
@@ -367,9 +372,13 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
   assert(row_end <= transform->ysize_);
   switch (transform->type_) {
     case SUBTRACT_GREEN:
-      VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
+      VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
       break;
     case PREDICTOR_TRANSFORM:
+      // TODO(vrabaud): parallelize transform predictors.
+      if (in != out) {
+        memcpy(out, in, (row_end - row_start) * width * sizeof(*out));
+      }
       PredictorInverseTransform(transform, row_start, row_end, out);
       if (row_end != transform->ysize_) {
         // The last predicted row in this iteration will be the top-pred row
@@ -379,7 +388,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
       }
       break;
     case CROSS_COLOR_TRANSFORM:
-      ColorSpaceInverseTransform(transform, row_start, row_end, out);
+      ColorSpaceInverseTransform(transform, row_start, row_end, in, out);
       break;
     case COLOR_INDEXING_TRANSFORM:
       if (in == out && transform->bits_ > 0) {
@@ -556,10 +565,10 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
 
 //------------------------------------------------------------------------------
 
-VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
 VP8LPredictorFunc VP8LPredictors[16];
 
-VP8LTransformColorFunc VP8LTransformColorInverse;
+VP8LTransformColorInverseFunc VP8LTransformColorInverse;
 
 VP8LConvertFunc VP8LConvertBGRAToRGB;
 VP8LConvertFunc VP8LConvertBGRAToRGBA;
diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h
index d9d821e3..7a95024a 100644
--- a/src/dsp/lossless.h
+++ b/src/dsp/lossless.h
@@ -35,8 +35,9 @@ extern "C" {
 typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
 extern VP8LPredictorFunc VP8LPredictors[16];
 
-typedef void (*VP8LProcessBlueAndRedFunc)(uint32_t* argb_data, int num_pixels);
-extern VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+typedef void (*VP8LProcessDecBlueAndRedFunc)(const uint32_t* src,
+                                             int num_pixels, uint32_t* dst);
+extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
 
 typedef struct {
   // Note: the members are uint8_t, so that any negative values are
@@ -45,9 +46,10 @@ typedef struct {
   uint8_t green_to_blue_;
   uint8_t red_to_blue_;
 } VP8LMultipliers;
-typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
-                                       uint32_t* argb_data, int num_pixels);
-extern VP8LTransformColorFunc VP8LTransformColorInverse;
+typedef void (*VP8LTransformColorInverseFunc)(const VP8LMultipliers* const m,
+                                              const uint32_t* src,
+                                              int num_pixels, uint32_t* dst);
+extern VP8LTransformColorInverseFunc VP8LTransformColorInverse;
 
 struct VP8LTransform;  // Defined in dec/vp8li.h.
 
@@ -93,7 +95,8 @@ void VP8LColorIndexInverseTransformAlpha(
 
 // Expose some C-only fallback functions
 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
-                                 uint32_t* data, int num_pixels);
+                                 const uint32_t* src, int num_pixels,
+                                 uint32_t* dst);
 
 void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst);
 void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst);
@@ -102,7 +105,8 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
                                int num_pixels, uint8_t* dst);
 void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst);
-void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels);
+void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
+                                uint32_t* dst);
 
 // Must be called before calling any of the above methods.
 void VP8LDspInit(void);
@@ -110,7 +114,10 @@ void VP8LDspInit(void);
 //------------------------------------------------------------------------------
 // Encoding
 
-extern VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
+extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
+                                       uint32_t* const dst, int num_pixels);
 extern VP8LTransformColorFunc VP8LTransformColor;
 typedef void (*VP8LCollectColorBlueTransformsFunc)(
     const uint32_t* argb, int stride,
diff --git a/src/dsp/lossless_enc.c b/src/dsp/lossless_enc.c
index 932f7ec6..58c6fc78 100644
--- a/src/dsp/lossless_enc.c
+++ b/src/dsp/lossless_enc.c
@@ -665,7 +665,7 @@ static void HistogramAdd(const VP8LHistogram* const a,
 
 //------------------------------------------------------------------------------
 
-VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
 
 VP8LTransformColorFunc VP8LTransformColor;
 
diff --git a/src/dsp/lossless_mips_dsp_r2.c b/src/dsp/lossless_mips_dsp_r2.c
index 694c1324..2984ce8d 100644
--- a/src/dsp/lossless_mips_dsp_r2.c
+++ b/src/dsp/lossless_mips_dsp_r2.c
@@ -228,25 +228,27 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
 
 // Add green to blue and red channels (i.e. perform the inverse transform of
 // 'subtract green').
-static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
+static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
+                                 uint32_t* dst) {
   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  uint32_t* const p_loop1_end = data + (num_pixels & ~3);
-  uint32_t* const p_loop2_end = data + num_pixels;
+  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+  const uint32_t* const p_loop2_end = src + num_pixels;
   __asm__ volatile (
     ".set       push                                          \n\t"
     ".set       noreorder                                     \n\t"
-    "beq        %[data],         %[p_loop1_end],     3f       \n\t"
+    "beq        %[src],          %[p_loop1_end],     3f       \n\t"
     " nop                                                     \n\t"
   "0:                                                         \n\t"
-    "lw         %[temp0],        0(%[data])                   \n\t"
-    "lw         %[temp1],        4(%[data])                   \n\t"
-    "lw         %[temp2],        8(%[data])                   \n\t"
-    "lw         %[temp3],        12(%[data])                  \n\t"
+    "lw         %[temp0],        0(%[src])                    \n\t"
+    "lw         %[temp1],        4(%[src])                    \n\t"
+    "lw         %[temp2],        8(%[src])                    \n\t"
+    "lw         %[temp3],        12(%[src])                   \n\t"
     "ext        %[temp4],        %[temp0],           8,    8  \n\t"
     "ext        %[temp5],        %[temp1],           8,    8  \n\t"
     "ext        %[temp6],        %[temp2],           8,    8  \n\t"
     "ext        %[temp7],        %[temp3],           8,    8  \n\t"
-    "addiu      %[data],         %[data],            16       \n\t"
+    "addiu      %[src],          %[src],             16       \n\t"
+    "addiu      %[dst],          %[dst],             16       \n\t"
     "replv.ph   %[temp4],        %[temp4]                     \n\t"
     "replv.ph   %[temp5],        %[temp5]                     \n\t"
     "replv.ph   %[temp6],        %[temp6]                     \n\t"
@@ -255,44 +257,47 @@ static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
     "addu.qb    %[temp1],        %[temp1],           %[temp5] \n\t"
     "addu.qb    %[temp2],        %[temp2],           %[temp6] \n\t"
     "addu.qb    %[temp3],        %[temp3],           %[temp7] \n\t"
-    "sw         %[temp0],        -16(%[data])                 \n\t"
-    "sw         %[temp1],        -12(%[data])                 \n\t"
-    "sw         %[temp2],        -8(%[data])                  \n\t"
-    "bne        %[data],         %[p_loop1_end],     0b       \n\t"
-    " sw        %[temp3],        -4(%[data])                  \n\t"
+    "sw         %[temp0],        -16(%[dst])                  \n\t"
+    "sw         %[temp1],        -12(%[dst])                  \n\t"
+    "sw         %[temp2],        -8(%[dst])                   \n\t"
+    "bne        %[src],          %[p_loop1_end],     0b       \n\t"
+    " sw        %[temp3],        -4(%[dst])                   \n\t"
   "3:                                                         \n\t"
-    "beq        %[data],         %[p_loop2_end],     2f       \n\t"
+    "beq        %[src],          %[p_loop2_end],     2f       \n\t"
     " nop                                                     \n\t"
   "1:                                                         \n\t"
-    "lw         %[temp0],        0(%[data])                   \n\t"
-    "addiu      %[data],         %[data],            4        \n\t"
+    "lw         %[temp0],        0(%[src])                    \n\t"
+    "addiu      %[src],          %[src],             4        \n\t"
+    "addiu      %[dst],          %[dst],             4        \n\t"
     "ext        %[temp4],        %[temp0],           8,    8  \n\t"
     "replv.ph   %[temp4],        %[temp4]                     \n\t"
     "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
-    "bne        %[data],         %[p_loop2_end],     1b       \n\t"
-    " sw        %[temp0],        -4(%[data])                  \n\t"
+    "bne        %[src],          %[p_loop2_end],     1b       \n\t"
+    " sw        %[temp0],        -4(%[dst])                   \n\t"
   "2:                                                         \n\t"
     ".set       pop                                           \n\t"
-    : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
-      [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
+    : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
+      [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
+      [temp7]"=&r"(temp7)
     : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
     : "memory"
   );
 }
 
 static void TransformColorInverse(const VP8LMultipliers* const m,
-                                  uint32_t* data, int num_pixels) {
+                                  const uint32_t* src, int num_pixels,
+                                  uint32_t* dst) {
   int temp0, temp1, temp2, temp3, temp4, temp5;
   uint32_t argb, argb1, new_red;
   const uint32_t G_to_R = m->green_to_red_;
   const uint32_t G_to_B = m->green_to_blue_;
   const uint32_t R_to_B = m->red_to_blue_;
-  uint32_t* const p_loop_end = data + (num_pixels & ~1);
+  const uint32_t* const p_loop_end = src + (num_pixels & ~1);
   __asm__ volatile (
     ".set            push                                    \n\t"
     ".set            noreorder                               \n\t"
-    "beq             %[data],      %[p_loop_end],  1f        \n\t"
+    "beq             %[src],       %[p_loop_end],  1f        \n\t"
     " nop                                                    \n\t"
     "replv.ph        %[temp0],     %[G_to_R]                 \n\t"
     "replv.ph        %[temp1],     %[G_to_B]                 \n\t"
@@ -304,9 +309,12 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
     "shra.ph         %[temp1],     %[temp1],       8         \n\t"
     "shra.ph         %[temp2],     %[temp2],       8         \n\t"
   "0:                                                        \n\t"
-    "lw              %[argb],      0(%[data])                \n\t"
-    "lw              %[argb1],     4(%[data])                \n\t"
-    "addiu           %[data],      %[data],        8         \n\t"
+    "lw              %[argb],      0(%[src])                 \n\t"
+    "lw              %[argb1],     4(%[src])                 \n\t"
+    "sw              %[argb],      0(%[dst])                 \n\t"
+    "sw              %[argb1],     4(%[dst])                 \n\t"
+    "addiu           %[src],       %[src],         8         \n\t"
+    "addiu           %[dst],       %[dst],         8         \n\t"
     "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t"
     "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t"
     "shll.ph         %[temp3],     %[temp3],       8         \n\t"
@@ -323,29 +331,29 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
     "shll.ph         %[temp4],     %[temp5],       8         \n\t"
     "shra.ph         %[temp4],     %[temp4],       8         \n\t"
     "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t"
-    "sb              %[temp5],     -2(%[data])               \n\t"
+    "sb              %[temp5],     -2(%[dst])                \n\t"
     "sra             %[temp5],     %[temp5],       16        \n\t"
     "shra.ph         %[temp4],     %[temp4],       5         \n\t"
     "addu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t"
     "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t"
-    "sb              %[temp5],     -6(%[data])               \n\t"
-    "sb              %[temp3],     -4(%[data])               \n\t"
+    "sb              %[temp5],     -6(%[dst])                \n\t"
+    "sb              %[temp3],     -4(%[dst])                \n\t"
     "sra             %[temp3],     %[temp3],       16        \n\t"
-    "bne             %[data],      %[p_loop_end],  0b        \n\t"
-    " sb             %[temp3],     -8(%[data])               \n\t"
+    "bne             %[src],       %[p_loop_end],  0b        \n\t"
+    " sb             %[temp3],     -8(%[dst])                \n\t"
   "1:                                                        \n\t"
     ".set            pop                                     \n\t"
     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
       [new_red]"=&r"(new_red), [argb]"=&r"(argb),
-      [argb1]"=&r"(argb1), [data]"+&r"(data)
+      [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
     : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
       [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
     : "memory", "hi", "lo"
   );
 
   // Fall-back to C-version for left-overs.
-  if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);
+  if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
 }
 
 static void ConvertBGRAToRGB(const uint32_t* src,
diff --git a/src/dsp/lossless_msa.c b/src/dsp/lossless_msa.c
index 26c5ab62..f6dd5649 100644
--- a/src/dsp/lossless_msa.c
+++ b/src/dsp/lossless_msa.c
@@ -244,44 +244,51 @@ static void ConvertBGRAToRGB(const uint32_t* src,
   }
 }
 
-static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
+static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
+                                 uint32_t* dst) {
   int i;
-  uint8_t* ptemp_data = (uint8_t*)data;
+  const uint8_t* in = (const uint8_t*)src;
+  uint8_t* out = (uint8_t*)dst;
   v16u8 src0, dst0, tmp0;
   const v16u8 mask = { 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255,
                        13, 255, 13, 255 };
 
   while (num_pixels >= 8) {
     v16u8 src1, dst1, tmp1;
-    LD_UB2(ptemp_data, 16, src0, src1);
+    LD_UB2(in, 16, src0, src1);
     VSHF_B2_UB(src0, src1, src1, src0, mask, mask, tmp0, tmp1);
     ADD2(src0, tmp0, src1, tmp1, dst0, dst1);
-    ST_UB2(dst0, dst1, ptemp_data, 16);
-    ptemp_data += 32;
+    ST_UB2(dst0, dst1, out, 16);
+    in += 32;
+    out += 32;
     num_pixels -= 8;
   }
   if (num_pixels > 0) {
     if (num_pixels >= 4) {
-      src0 = LD_UB(ptemp_data);
+      src0 = LD_UB(in);
       tmp0 = VSHF_UB(src0, src0, mask);
       dst0 = src0 + tmp0;
-      ST_UB(dst0, ptemp_data);
-      ptemp_data += 16;
+      ST_UB(dst0, out);
+      in += 16;
+      out += 16;
       num_pixels -= 4;
     }
     for (i = 0; i < num_pixels; i++) {
-      const uint8_t b = ptemp_data[0];
-      const uint8_t g = ptemp_data[1];
-      const uint8_t r = ptemp_data[2];
-      ptemp_data[0] = (b + g) & 0xff;
-      ptemp_data[2] = (r + g) & 0xff;
-      ptemp_data += 4;
+      const uint8_t b = in[0];
+      const uint8_t g = in[1];
+      const uint8_t r = in[2];
+      out[0] = (b + g) & 0xff;
+      out[1] = g;
+      out[2] = (r + g) & 0xff;
+      out[4] = in[4];
+      out += 4;
     }
   }
 }
 
 static void TransformColorInverse(const VP8LMultipliers* const m,
-                                  uint32_t* data, int num_pixels) {
+                                  const uint32_t* src, int num_pixels,
+                                  uint32_t* dst) {
   v16u8 src0, dst0;
   const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
                                          (m->green_to_red_ << 16));
@@ -293,34 +300,36 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
 
   while (num_pixels >= 8) {
     v16u8 src1, dst1;
-    LD_UB2(data, 4, src0, src1);
+    LD_UB2(src, 4, src0, src1);
     TRANSFORM_COLOR_INVERSE_8(src0, src1, dst0, dst1, g2br, r2b, mask0, mask1);
-    ST_UB2(dst0, dst1, data, 4);
-    data += 8;
+    ST_UB2(dst0, dst1, dst, 4);
+    src += 8;
+    dst += 8;
     num_pixels -= 8;
   }
   if (num_pixels > 0) {
     if (num_pixels >= 4) {
-      src0 = LD_UB(data);
+      src0 = LD_UB(src);
       TRANSFORM_COLOR_INVERSE_4(src0, dst0, g2br, r2b, mask0, mask1);
-      ST_UB(dst0, data);
-      data += 4;
+      ST_UB(dst0, dst);
+      src += 4;
+      dst += 4;
       num_pixels -= 4;
     }
     if (num_pixels > 0) {
-      src0 = LD_UB(data);
+      src0 = LD_UB(src);
       TRANSFORM_COLOR_INVERSE_4(src0, dst0, g2br, r2b, mask0, mask1);
       if (num_pixels == 3) {
         const uint64_t pix_d = __msa_copy_s_d((v2i64)dst0, 0);
         const uint32_t pix_w = __msa_copy_s_w((v4i32)dst0, 2);
-        SD(pix_d, data + 0);
-        SW(pix_w, data + 2);
+        SD(pix_d, dst + 0);
+        SW(pix_w, dst + 2);
       } else if (num_pixels == 2) {
         const uint64_t pix_d = __msa_copy_s_d((v2i64)dst0, 0);
-        SD(pix_d, data);
+        SD(pix_d, dst);
       } else {
         const uint32_t pix_w = __msa_copy_s_w((v4i32)dst0, 0);
-        SW(pix_w, data);
+        SW(pix_w, dst);
       }
     }
   }
diff --git a/src/dsp/lossless_neon.c b/src/dsp/lossless_neon.c
index 6faccb8f..509f0c52 100644
--- a/src/dsp/lossless_neon.c
+++ b/src/dsp/lossless_neon.c
@@ -171,28 +171,30 @@ static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
 }
 #endif  // USE_VTBLQ
 
-static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
-  const uint32_t* const end = argb_data + (num_pixels & ~3);
+static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
+                                 uint32_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~3);
 #ifdef USE_VTBLQ
   const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
 #else
   const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
 #endif
-  for (; argb_data < end; argb_data += 4) {
-    const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
+  for (; src < end; src += 4, dst += 4) {
+    const uint8x16_t argb = vld1q_u8((const uint8_t*)src);
     const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
-    vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
+    vst1q_u8((uint8_t*)dst, vaddq_u8(argb, greens));
   }
   // fallthrough and finish off with plain-C
-  VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
+  VP8LAddGreenToBlueAndRed_C(src, num_pixels & 3, dst);
 }
 
 //------------------------------------------------------------------------------
 // Color Transform
 
 static void TransformColorInverse(const VP8LMultipliers* const m,
-                                  uint32_t* argb_data, int num_pixels) {
-  // sign-extended multiplying constants, pre-shifted by 6.
+                                  const uint32_t* const src, int num_pixels,
+                                  uint32_t* dst) {
+// sign-extended multiplying constants, pre-shifted by 6.
 #define CST(X)  (((int16_t)(m->X << 8)) >> 6)
   const int16_t rb[8] = {
     CST(green_to_blue_), CST(green_to_red_),
@@ -219,7 +221,7 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
   const uint32x4_t mask_ag = vdupq_n_u32(0xff00ff00u);
   int i;
   for (i = 0; i + 4 <= num_pixels; i += 4) {
-    const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i));
+    const uint8x16_t in = vld1q_u8((const uint8_t*)(src + i));
     const uint32x4_t a0g0 = vandq_u32(vreinterpretq_u32_u8(in), mask_ag);
     // 0 g 0 g
     const uint8x16_t greens = DoGreenShuffle(in, shuffle);
@@ -240,10 +242,10 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
     // 0  r'  0  b''
     const uint16x8_t G = vshrq_n_u16(vreinterpretq_u16_s8(F), 8);
     const uint32x4_t out = vorrq_u32(vreinterpretq_u32_u16(G), a0g0);
-    vst1q_u32(argb_data + i, out);
+    vst1q_u32(dst + i, out);
   }
   // Fall-back to C-version for left-overs.
-  VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
+  VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);
 }
 
 #undef USE_VTBLQ
diff --git a/src/dsp/lossless_sse2.c b/src/dsp/lossless_sse2.c
index 92dbdea9..fb09fe0c 100644
--- a/src/dsp/lossless_sse2.c
+++ b/src/dsp/lossless_sse2.c
@@ -157,26 +157,28 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
 //------------------------------------------------------------------------------
 // Subtract-Green Transform
 
-static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
+static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
+                                 uint32_t* dst) {
   int i;
   for (i = 0; i + 4 <= num_pixels; i += 4) {
-    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+    const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb
     const __m128i A = _mm_srli_epi16(in, 8);     // 0 a 0 g
     const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
     const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0));  // 0g0g
     const __m128i out = _mm_add_epi8(in, C);
-    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+    _mm_storeu_si128((__m128i*)&dst[i], out);
   }
   // fallthrough and finish off with plain-C
-  VP8LAddGreenToBlueAndRed_C(argb_data + i, num_pixels - i);
+  VP8LAddGreenToBlueAndRed_C(src + i, num_pixels - i, dst + i);
 }
 
 //------------------------------------------------------------------------------
 // Color Transform
 
 static void TransformColorInverse(const VP8LMultipliers* const m,
-                                  uint32_t* argb_data, int num_pixels) {
-  // sign-extended multiplying constants, pre-shifted by 5.
+                                  const uint32_t* const src, int num_pixels,
+                                  uint32_t* dst) {
+// sign-extended multiplying constants, pre-shifted by 5.
 #define CST(X)  (((int16_t)(m->X << 8)) >> 5)   // sign-extend
   const __m128i mults_rb = _mm_set_epi16(
       CST(green_to_red_), CST(green_to_blue_),
@@ -190,7 +192,7 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
   const __m128i mask_ag = _mm_set1_epi32(0xff00ff00);  // alpha-green masks
   int i;
   for (i = 0; i + 4 <= num_pixels; i += 4) {
-    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+    const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb
     const __m128i A = _mm_and_si128(in, mask_ag);     // a   0   g   0
     const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
     const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0));  // g0g0
@@ -202,10 +204,10 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
     const __m128i I = _mm_add_epi8(H, F);              // r' x  b'' 0
     const __m128i J = _mm_srli_epi16(I, 8);            // 0  r'  0  b''
     const __m128i out = _mm_or_si128(J, A);
-    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+    _mm_storeu_si128((__m128i*)&dst[i], out);
   }
   // Fall-back to C-version for left-overs.
-  VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
+  VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);
 }
 
 //------------------------------------------------------------------------------