From a0a488554deb95c596611c498af9ac187bd61425 Mon Sep 17 00:00:00 2001
From: Urvang Joshi <urvang@google.com>
Date: Thu, 9 Aug 2012 02:44:38 +0530
Subject: [PATCH] Lossless decoder fix for a special transform order

Fix the lossless decoder for the case when it has to apply other
inverse transforms before applying Color indexing inverse transform.

The main idea is to make ColorIndexingInverse virtually in-place: we
use the fact that the argb_cache is allocated to accommodate all
*unpacked* pixels of a macro-row, not just *packed* pixels.

Change-Id: I27f11f3043f863dfd753cc2580bc5b36376800c4
---
 src/dec/vp8l.c     | 14 ++++++++------
 src/dsp/lossless.c | 16 +++++++++++++++-
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index 39fa4d10..ab8d2e3b 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -615,20 +615,22 @@ static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,
 
 typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
 
-static void ApplyTransforms(VP8LDecoder* const dec, int num_rows,
+static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
                             const uint32_t* const rows) {
   int n = dec->next_transform_;
   const int cache_pixs = dec->width_ * num_rows;
-  uint32_t* rows_data = dec->argb_cache_;
   const int start_row = dec->last_row_;
   const int end_row = start_row + num_rows;
+  const uint32_t* rows_in = rows;
+  uint32_t* const rows_out = dec->argb_cache_;
 
   // Inverse transforms.
   // TODO: most transforms only need to operate on the cropped region only.
-  memcpy(rows_data, rows, cache_pixs * sizeof(*rows_data));
+  memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
   while (n-- > 0) {
     VP8LTransform* const transform = &dec->transforms_[n];
-    VP8LInverseTransform(transform, start_row, end_row, rows, rows_data);
+    VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out);
+    rows_in = rows_out;
   }
 }
 
@@ -639,7 +641,7 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
   const int num_rows = row - dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
-  ApplyTransforms(dec, num_rows, rows);
+  ApplyInverseTransforms(dec, num_rows, rows);
 
   // Emit output.
   {
@@ -1066,7 +1068,7 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
   const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
-  ApplyTransforms(dec, num_rows, in);
+  ApplyInverseTransforms(dec, num_rows, in);
 
   // Extract alpha (which is stored in the green plane).
   {
diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
index 472e641e..00f8399a 100644
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -935,7 +935,7 @@ static void ColorIndexInverseTransform(
       uint32_t packed_pixels = 0;
       int x;
       for (x = 0; x < width; ++x) {
-        // We need to load fresh 'packed_pixels' once every 'bytes_per_pixels'
+        // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
         // increments of x. Fortunately, pixels_per_byte is a power of 2, so
         // can just use a mask for that, instead of decrementing a counter.
         if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
@@ -976,7 +976,21 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
       ColorSpaceInverseTransform(transform, row_start, row_end, out);
       break;
     case COLOR_INDEXING_TRANSFORM:
+      if (in == out && transform->bits_ > 0) {
+        // Move packed pixels to the end of unpacked region, so that unpacking
+        // can occur seamlessly.
+        // Also, note that this is the only transform that applies on
+        // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
+        // transforms work on effective width of xsize_.
+        const int out_stride = (row_end - row_start) * transform->xsize_;
+        const int in_stride = (row_end - row_start) *
+            VP8LSubSampleSize(transform->xsize_, transform->bits_);
+        uint32_t* const src = out + out_stride - in_stride;
+        memmove(src, out, in_stride * sizeof(*src));
+        ColorIndexInverseTransform(transform, row_start, row_end, src, out);
+      } else {
       ColorIndexInverseTransform(transform, row_start, row_end, in, out);
+      }
       break;
   }
 }