From 068db59e2690523225170ba5d05f399b74b9e1ed Mon Sep 17 00:00:00 2001
From: Urvang Joshi <urvang@google.com>
Date: Tue, 11 Jun 2013 16:04:54 -0700
Subject: [PATCH] Intertwined decoding of alpha and RGB

This will reduce the time to first decoded pixel.

Change-Id: I07b900c0ed4af3aac806b2731e11cd18ec16d016
---
 NEWS                |   3 +
 makefile.unix       |   1 +
 src/dec/Makefile.am |   1 +
 src/dec/alpha.c     | 147 ++++++++++++++++++++++++++++++--------------
 src/dec/alphai.h    |  55 +++++++++++++++++
 src/dec/frame.c     |   7 +--
 src/dec/idec.c      |  18 +++++-
 src/dec/vp8.c       |   3 +
 src/dec/vp8i.h      |   6 +-
 src/dec/vp8l.c      |  73 ++++++++++++++--------
 src/dec/vp8li.h     |  24 +++++---
 11 files changed, 250 insertions(+), 88 deletions(-)
 create mode 100644 src/dec/alphai.h

diff --git a/NEWS b/NEWS
index 8615854f..469b66b8 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,9 @@
   * Add incremental decoding support for images containing ALPH and ICCP chunks.
   * New function: WebPMuxGetCanvasSize
   * BMP and TIFF format output added to 'dwebp'
+  * Significant memory reduction for decoding lossy images with alpha.
+  * Intertwined decoding of RGB and alpha for a shorter
+    time-to-first-decoded-pixel.
 
 - 3/20/13: version 0.3.0
   This is a binary compatible release.
diff --git a/makefile.unix b/makefile.unix
index 6c787f54..ace67a68 100644
--- a/makefile.unix
+++ b/makefile.unix
@@ -176,6 +176,7 @@ HDRS_INSTALLED = \
     src/webp/types.h \
 
 HDRS = \
+    src/dec/alphai.h \
     src/dec/decode_vp8.h \
     src/dec/vp8i.h \
     src/dec/vp8li.h \
diff --git a/src/dec/Makefile.am b/src/dec/Makefile.am
index 06142063..0b5c29a6 100644
--- a/src/dec/Makefile.am
+++ b/src/dec/Makefile.am
@@ -3,6 +3,7 @@ noinst_LTLIBRARIES = libwebpdecode.la
 
 libwebpdecode_la_SOURCES =
 libwebpdecode_la_SOURCES += alpha.c
+libwebpdecode_la_SOURCES += alphai.h
 libwebpdecode_la_SOURCES += buffer.c
 libwebpdecode_la_SOURCES += decode_vp8.h
 libwebpdecode_la_SOURCES += frame.c
diff --git a/src/dec/alpha.c b/src/dec/alpha.c
index d91345db..70e13bcc 100644
--- a/src/dec/alpha.c
+++ b/src/dec/alpha.c
@@ -12,9 +12,9 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <stdlib.h>
+#include "./alphai.h"
 #include "./vp8i.h"
 #include "./vp8li.h"
-#include "../utils/filters.h"
 #include "../utils/quant_levels_dec.h"
 #include "../webp/format_constants.h"
 
@@ -23,87 +23,140 @@ extern "C" {
 #endif
 
 //------------------------------------------------------------------------------
-// Decodes the compressed data 'data' of size 'data_size' into the 'output'.
-// The 'output' buffer should be pre-allocated and must be of the same
-// dimension 'height'x'width', as that of the image.
-//
-// Returns 1 on successfully decoding the compressed alpha and
-//         0 if either:
-//           error in bit-stream header (invalid compression mode or filter), or
-//           error returned by appropriate compression method.
+// ALPHDecoder object.
 
-static int DecodeAlpha(const uint8_t* data, size_t data_size,
-                       int width, int height, uint8_t* output) {
-  WEBP_FILTER_TYPE filter;
-  int pre_processing;
-  int rsrv;
+ALPHDecoder* ALPHNew(void) {
+  ALPHDecoder* const dec = (ALPHDecoder*)calloc(1, sizeof(*dec));
+  return dec;
+}
+
+void ALPHDelete(ALPHDecoder* const dec) {
+  if (dec != NULL) {
+    VP8LDelete(dec->vp8l_dec_);
+    dec->vp8l_dec_ = NULL;
+    free(dec);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Decoding.
+
+// Initialize alpha decoding by parsing the alpha header and decoding the image
+// header for alpha data stored using lossless compression.
+// Returns false in case of error in alpha header (data too short, invalid
+// compression method or filter, error in lossless header data etc).
+static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
+                    size_t data_size, int width, int height, uint8_t* output) {
   int ok = 0;
-  int method;
   const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
   const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
+  int rsrv;
 
   assert(width > 0 && height > 0);
   assert(data != NULL && output != NULL);
 
+  dec->width_ = width;
+  dec->height_ = height;
+
   if (data_size <= ALPHA_HEADER_LEN) {
     return 0;
   }
 
-  method = (data[0] >> 0) & 0x03;
-  filter = (data[0] >> 2) & 0x03;
-  pre_processing = (data[0] >> 4) & 0x03;
+  dec->method_ = (data[0] >> 0) & 0x03;
+  dec->filter_ = (data[0] >> 2) & 0x03;
+  dec->pre_processing_ = (data[0] >> 4) & 0x03;
   rsrv = (data[0] >> 6) & 0x03;
-  if (method < ALPHA_NO_COMPRESSION ||
-      method > ALPHA_LOSSLESS_COMPRESSION ||
-      filter >= WEBP_FILTER_LAST ||
-      pre_processing > ALPHA_PREPROCESSED_LEVELS ||
+  if (dec->method_ < ALPHA_NO_COMPRESSION ||
+      dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
+      dec->filter_ >= WEBP_FILTER_LAST ||
+      dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS ||
       rsrv != 0) {
     return 0;
   }
 
-  if (method == ALPHA_NO_COMPRESSION) {
-    const size_t alpha_decoded_size = width * height;
+  if (dec->method_ == ALPHA_NO_COMPRESSION) {
+    const size_t alpha_decoded_size = dec->width_ * dec->height_;
     ok = (alpha_data_size >= alpha_decoded_size);
-    if (ok) memcpy(output, alpha_data, alpha_decoded_size);
   } else {
-    ok = VP8LDecodeAlphaImageStream(width, height, alpha_data, alpha_data_size,
-                                    output);
+    assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
+    ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size, output);
   }
-
-  if (ok) {
-    WebPUnfilterFunc unfilter_func = WebPUnfilters[filter];
-    if (unfilter_func != NULL) {
-      // TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
-      // and apply filter per image-row.
-      unfilter_func(width, height, width, 0, height, output);
-    }
-    if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
-      ok = DequantizeLevels(output, width, height, 0, height);
-    }
-  }
-
   return ok;
 }
 
+// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha
+// starting from row number 'row'. It assumes that rows upto (row - 1) have
+// already been decoded.
+// Returns false in case of bitstream error.
+static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
+  ALPHDecoder* const alph_dec = dec->alph_dec_;
+  const int width = alph_dec->width_;
+  const int height = alph_dec->height_;
+  WebPUnfilterFunc unfilter_func = WebPUnfilters[alph_dec->filter_];
+  uint8_t* const output = dec->alpha_plane_;
+  if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
+    const size_t offset = row * width;
+    const size_t num_pixels = num_rows * width;
+    assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN + offset + num_pixels);
+    memcpy(dec->alpha_plane_ + offset,
+           dec->alpha_data_ + ALPHA_HEADER_LEN + offset, num_pixels);
+  } else {  // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
+    assert(alph_dec->vp8l_dec_ != NULL);
+    if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
+      return 0;
+    }
+  }
+
+  if (unfilter_func != NULL) {
+    unfilter_func(width, height, width, row, num_rows, output);
+  }
+
+  if (alph_dec->pre_processing_ == ALPHA_PREPROCESSED_LEVELS) {
+    if (!DequantizeLevels(output, width, height, row, num_rows)) {
+      return 0;
+    }
+  }
+
+  if (row + num_rows == dec->pic_hdr_.height_) {
+    dec->is_alpha_decoded_ = 1;
+  }
+  return 1;
+}
+
 //------------------------------------------------------------------------------
+// Main entry point.
 
 const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
                                       int row, int num_rows) {
   const int width = dec->pic_hdr_.width_;
   const int height = dec->pic_hdr_.height_;
 
-  if (row < 0 || num_rows < 0 || row + num_rows > height) {
+  if (row < 0 || num_rows <= 0 || row + num_rows > height) {
     return NULL;    // sanity check.
   }
 
   if (row == 0) {
-    // Decode everything during the first call.
-    assert(!dec->is_alpha_decoded_);
-    if (!DecodeAlpha(dec->alpha_data_, (size_t)dec->alpha_data_size_,
-                     width, height, dec->alpha_plane_)) {
-      return NULL;  // Error.
+    // Initialize decoding.
+    assert(dec->alpha_plane_ != NULL);
+    dec->alph_dec_ = ALPHNew();
+    if (dec->alph_dec_ == NULL) return NULL;
+    if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
+                  width, height, dec->alpha_plane_)) {
+      ALPHDelete(dec->alph_dec_);
+      dec->alph_dec_ = NULL;
+      return NULL;
     }
-    dec->is_alpha_decoded_ = 1;
+  }
+
+  if (!dec->is_alpha_decoded_) {
+    int ok = 0;
+    assert(dec->alph_dec_ != NULL);
+    ok = ALPHDecode(dec, row, num_rows);
+    if (!ok || dec->is_alpha_decoded_) {
+      ALPHDelete(dec->alph_dec_);
+      dec->alph_dec_ = NULL;
+    }
+    if (!ok) return NULL;  // Error.
   }
 
   // Return a pointer to the current decoded row.
diff --git a/src/dec/alphai.h b/src/dec/alphai.h
new file mode 100644
index 00000000..e742a671
--- /dev/null
+++ b/src/dec/alphai.h
@@ -0,0 +1,55 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha decoder: internal header.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_DEC_ALPHAI_H_
+#define WEBP_DEC_ALPHAI_H_
+
+#include "./webpi.h"
+#include "../utils/filters.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+struct VP8LDecoder;  // Defined in dec/vp8li.h.
+
+typedef struct ALPHDecoder ALPHDecoder;
+struct ALPHDecoder {
+  int width_;
+  int height_;
+  int method_;
+  WEBP_FILTER_TYPE filter_;
+  int pre_processing_;
+  struct VP8LDecoder* vp8l_dec_;
+  VP8Io io_;
+  size_t bytes_per_pixel_;  // Although alpha channel requires only 1 byte per
+                            // pixel, sometimes VP8LDecoder may need to allocate
+                            // 4 bytes per pixel internally during decode.
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// Allocates a new alpha decoder instance.
+ALPHDecoder* ALPHNew(void);
+
+// Clears and deallocates an alpha decoder instance.
+void ALPHDelete(ALPHDecoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_ALPHAI_H_ */
diff --git a/src/dec/frame.c b/src/dec/frame.c
index 4cceb317..92c23185 100644
--- a/src/dec/frame.c
+++ b/src/dec/frame.c
@@ -201,11 +201,8 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
     }
     io->a = NULL;
     if (dec->alpha_data_ != NULL && y_start < y_end) {
-      // TODO(skal): several things to correct here:
-      // * testing presence of alpha with dec->alpha_data_ is not a good idea
-      // * we're actually decompressing the full plane only once. It should be
-      //   more obvious from signature.
-      // * we could free alpha_data_ right after this call, but we don't own.
+      // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a
+      // good idea.
       io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
       if (io->a == NULL) {
         return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
diff --git a/src/dec/idec.c b/src/dec/idec.c
index eec61cbc..c9c00f62 100644
--- a/src/dec/idec.c
+++ b/src/dec/idec.c
@@ -15,6 +15,7 @@
 #include <string.h>
 #include <stdlib.h>
 
+#include "./alphai.h"
 #include "./webpi.h"
 #include "./vp8i.h"
 #include "../utils/utils.h"
@@ -143,7 +144,22 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
       }
       assert(last_part >= 0);
       dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_;
-      if (NeedCompressedAlpha(idec)) dec->alpha_data_ += offset;
+      if (NeedCompressedAlpha(idec)) {
+        ALPHDecoder* const alph_dec = dec->alph_dec_;
+        dec->alpha_data_ += offset;
+        if (alph_dec != NULL) {
+          if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
+            VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
+            assert(alph_vp8l_dec != NULL);
+            assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
+            VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_,
+                                   dec->alpha_data_ + ALPHA_HEADER_LEN,
+                                   dec->alpha_data_size_ - ALPHA_HEADER_LEN);
+          } else {  // alph_dec->method_ == ALPHA_NO_COMPRESSION
+            // Nothing special to do in this case.
+          }
+        }
+      }
     } else {    // Resize lossless bitreader
       VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
       VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
diff --git a/src/dec/vp8.c b/src/dec/vp8.c
index edeb0d2c..c91d2bcd 100644
--- a/src/dec/vp8.c
+++ b/src/dec/vp8.c
@@ -13,6 +13,7 @@
 
 #include <stdlib.h>
 
+#include "./alphai.h"
 #include "./vp8i.h"
 #include "./vp8li.h"
 #include "./webpi.h"
@@ -748,6 +749,8 @@ void VP8Clear(VP8Decoder* const dec) {
   if (dec->use_threads_) {
     WebPWorkerEnd(&dec->worker_);
   }
+  ALPHDelete(dec->alph_dec_);
+  dec->alph_dec_ = NULL;
   free(dec->mem_);
   dec->mem_ = NULL;
   dec->mem_size_ = 0;
diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h
index a28620d1..35ad36f0 100644
--- a/src/dec/vp8i.h
+++ b/src/dec/vp8i.h
@@ -279,12 +279,14 @@ struct VP8Decoder {
   int filter_row_;                           // per-row flag
   VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2];  // precalculated per-segment/type
 
-  // extensions
-  const uint8_t* alpha_data_;   // compressed alpha data (if present)
+  // Alpha
+  struct ALPHDecoder* alph_dec_;  // alpha-plane decoder object
+  const uint8_t* alpha_data_;     // compressed alpha data (if present)
   size_t alpha_data_size_;
   int is_alpha_decoded_;  // true if alpha_data_ is decoded in alpha_plane_
   uint8_t* alpha_plane_;        // output. Persistent, contains the whole data.
 
+  // extensions
   int layer_colorspace_;
   const uint8_t* layer_data_;   // compressed layer data (if present)
   size_t layer_data_size_;
diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index cefcf29a..ba6bb5c5 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -14,6 +14,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include "./alphai.h"
 #include "./vp8li.h"
 #include "../dsp/lossless.h"
 #include "../dsp/yuv.h"
@@ -1125,52 +1126,72 @@ static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
   dec->last_row_ = dec->last_out_row_ = row;
 }
 
-int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
-                               size_t data_size, uint8_t* const output) {
-  VP8Io io;
-  int ok = 0;
-  VP8LDecoder* const dec = VP8LNew();
-  size_t bytes_per_pixel = sizeof(uint32_t);  // Default: BGRA mode.
-  if (dec == NULL) return 0;
+int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
+                          const uint8_t* const data, size_t data_size,
+                          uint8_t* const output) {
+  VP8LDecoder* dec;
+  VP8Io* io;
+  assert(alph_dec != NULL);
+  alph_dec->vp8l_dec_ = VP8LNew();
+  if (alph_dec->vp8l_dec_ == NULL) return 0;
+  dec = alph_dec->vp8l_dec_;
 
-  dec->width_ = width;
-  dec->height_ = height;
-  dec->io_ = &io;
+  alph_dec->bytes_per_pixel_ = sizeof(uint32_t);  // Default: BGRA mode.
 
-  VP8InitIo(&io);
-  WebPInitCustomIo(NULL, &io);    // Just a sanity Init. io won't be used.
-  io.opaque = output;
-  io.width = width;
-  io.height = height;
+  dec->width_ = alph_dec->width_;
+  dec->height_ = alph_dec->height_;
+  dec->io_ = &alph_dec->io_;
+  io = dec->io_;
+
+  VP8InitIo(io);
+  WebPInitCustomIo(NULL, io);  // Just a sanity Init. io won't be used.
+  io->opaque = output;
+  io->width = alph_dec->width_;
+  io->height = alph_dec->height_;
 
   dec->status_ = VP8_STATUS_OK;
   VP8LInitBitReader(&dec->br_, data, data_size);
 
   dec->action_ = READ_HDR;
-  if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err;
+  if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) {
+    goto Err;
+  }
 
   // Special case: if alpha data contains only the color indexing transform
   // (a frequent case), we will use DecodeAlphaData() method that only needs
   // allocation of 1 byte per pixel (alpha channel).
   if (dec->next_transform_ == 1 &&
       dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM) {
-    bytes_per_pixel = sizeof(uint8_t);
+    alph_dec->bytes_per_pixel_ = sizeof(uint8_t);
   }
 
   // Allocate internal buffers (note that dec->width_ may have changed here).
-  if (!AllocateInternalBuffers(dec, width, bytes_per_pixel)) goto Err;
+  if (!AllocateInternalBuffers(dec, alph_dec->width_,
+                               alph_dec->bytes_per_pixel_)) {
+    goto Err;
+  }
 
-  // Decode (with special row processing).
   dec->action_ = READ_DATA;
-  ok = (bytes_per_pixel == sizeof(uint8_t)) ?
-      DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
-                      dec->height_, ExtractPalettedAlphaRows) :
-      DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
-                      dec->height_, ExtractAlphaRows);
+  return 1;
 
  Err:
-  VP8LDelete(dec);
-  return ok;
+  VP8LDelete(alph_dec->vp8l_dec_);
+  alph_dec->vp8l_dec_ = NULL;
+  return 0;
+}
+
+int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) {
+  VP8LDecoder* const dec = alph_dec->vp8l_dec_;
+  assert(dec != NULL);
+  assert(dec->action_ == READ_DATA);
+  assert(last_row <= dec->height_);
+
+  // Decode (with special row processing).
+  return (alph_dec->bytes_per_pixel_ == sizeof(uint8_t)) ?
+      DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
+                      last_row, ExtractPalettedAlphaRows) :
+      DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_, last_row,
+                      ExtractAlphaRows);
 }
 
 //------------------------------------------------------------------------------
diff --git a/src/dec/vp8li.h b/src/dec/vp8li.h
index 30c8184a..aa132a64 100644
--- a/src/dec/vp8li.h
+++ b/src/dec/vp8li.h
@@ -57,7 +57,8 @@ typedef struct {
   HTreeGroup     *htree_groups_;
 } VP8LMetadata;
 
-typedef struct {
+typedef struct VP8LDecoder VP8LDecoder;
+struct VP8LDecoder {
   VP8StatusCode    status_;
   VP8LDecodeState  action_;
   VP8LDecodeState  state_;
@@ -88,18 +89,27 @@ typedef struct {
 
   uint8_t         *rescaler_memory;  // Working memory for rescaling work.
   WebPRescaler    *rescaler;         // Common rescaler for all channels.
-} VP8LDecoder;
+};
 
 //------------------------------------------------------------------------------
 // internal functions. Not public.
 
+struct ALPHDecoder;  // Defined in dec/alphai.h.
+
 // in vp8l.c
 
-// Decodes a raw image stream (without header) and store the alpha data
-// into *output, which must be of size width x height. Returns false in case
-// of error.
-int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
-                               size_t data_size, uint8_t* const output);
+// Decodes image header for alpha data stored using lossless compression.
+// Returns false in case of error.
+int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec,
+                          const uint8_t* const data, size_t data_size,
+                          uint8_t* const output);
+
+// Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are
+// already decoded in previous call(s), it will resume decoding from where it
+// was paused.
+// Returns false in case of bitstream error.
+int VP8LDecodeAlphaImageStream(struct ALPHDecoder* const alph_dec,
+                               int last_row);
 
 // Allocates and initialize a new lossless decoder instance.
 VP8LDecoder* VP8LNew(void);