From 85e215d36fe3b1967ccbbac922ab51aace854f4a Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 20 Jul 2012 16:19:12 -0700
Subject: [PATCH 01/42] README/manpages/configure: update website link

code.google was moved to developers.google

Change-Id: I072cab38ccb6f45c3d1d6e533d1626420cdbba56
---
 README        | 2 +-
 configure.ac  | 2 +-
 man/cwebp.1   | 3 ++-
 man/dwebp.1   | 3 ++-
 man/webpmux.1 | 3 ++-
 5 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/README b/README
index a30d0090..65a015c8 100644
--- a/README
+++ b/README
@@ -13,7 +13,7 @@ WebP codec: library to encode and decode images in WebP format. This package
 contains the library that can be used in other programs to add WebP support,
 as well as the command line tools 'cwebp' and 'dwebp'.
 
-See http://code.google.com/speed/webp
+See http://developers.google.com/speed/webp
 
 Latest sources are available from http://www.webmproject.org/code/
 
diff --git a/configure.ac b/configure.ac
index 9fc948bf..09559681 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,6 +1,6 @@
 AC_INIT([libwebp], [0.1.99],
         [http://code.google.com/p/webp/issues],,
-        [http://code.google.com/speed/webp])
+        [http://developers.google.com/speed/webp])
 AC_CANONICAL_TARGET
 AM_INIT_AUTOMAKE([-Wall foreign subdir-objects])
 AC_PROG_LIBTOOL
diff --git a/man/cwebp.1 b/man/cwebp.1
index b4e4c1a4..181c315e 100644
--- a/man/cwebp.1
+++ b/man/cwebp.1
@@ -212,4 +212,5 @@ for the Debian project (and may be used by others).
 .SH SEE ALSO
 .BR dwebp (1).
 .br
-Please refer to http://code.google.com/speed/webp/ for additional information.
+Please refer to http://developers.google.com/speed/webp/ for additional
+information.
diff --git a/man/dwebp.1 b/man/dwebp.1
index 637297f6..b587bcc6 100644
--- a/man/dwebp.1
+++ b/man/dwebp.1
@@ -87,4 +87,5 @@ for the Debian project (and may be used by others).
 .SH SEE ALSO
 .BR cwebp (1).
 .br
-Please refer to http://code.google.com/speed/webp/ for additional information.
+Please refer to http://developers.google.com/speed/webp/ for additional
+information.
diff --git a/man/webpmux.1 b/man/webpmux.1
index 2e44f068..310fbb6d 100644
--- a/man/webpmux.1
+++ b/man/webpmux.1
@@ -131,4 +131,5 @@ for the Debian project (and may be used by others).
 .BR dwebp (1),
 .BR cwebp (1).
 .br
-Please refer to http://code.google.com/speed/webp/ for additional information.
+Please refer to http://developers.google.com/speed/webp/ for additional
+information.

From d919ed06ebe44aa8224a699ddb72041139c9417f Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 20 Jul 2012 16:06:06 -0700
Subject: [PATCH 02/42] dwebp: add PAM output support

retains the alpha channel rather than stripping it as with PPM.

display from ImageMagick can render the files

Change-Id: I4f3a5d332937e0aeaf4e3fbd214fdae3b5382fb8
---
 examples/dwebp.c | 26 +++++++++++++++++++++-----
 man/dwebp.1      |  9 ++++++---
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/examples/dwebp.c b/examples/dwebp.c
index 619331d7..4846b8a3 100644
--- a/examples/dwebp.c
+++ b/examples/dwebp.c
@@ -59,6 +59,7 @@ extern void* VP8GetCPUInfo;   // opaque forward declaration.
 // Output types
 typedef enum {
   PNG = 0,
+  PAM,
   PPM,
   PGM,
   ALPHA_PLANE_ONLY  // this is for experimenting only
@@ -201,15 +202,22 @@ static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
 }
 #endif
 
-static int WritePPM(FILE* fout, const WebPDecBuffer* const buffer) {
+static int WritePPM(FILE* fout, const WebPDecBuffer* const buffer, int alpha) {
   const uint32_t width = buffer->width;
   const uint32_t height = buffer->height;
   const unsigned char* const rgb = buffer->u.RGBA.rgba;
   const int stride = buffer->u.RGBA.stride;
+  const size_t bytes_per_px = alpha ? 4 : 3;
   uint32_t y;
-  fprintf(fout, "P6\n%d %d\n255\n", width, height);
+
+  if (alpha) {
+    fprintf(fout, "P7\nWIDTH %d\nHEIGHT %d\nDEPTH 4\nMAXVAL 255\n"
+                  "TUPLTYPE RGB_ALPHA\nENDHDR\n", width, height);
+  } else {
+    fprintf(fout, "P6\n%d %d\n255\n", width, height);
+  }
   for (y = 0; y < height; ++y) {
-    if (fwrite(rgb + y * stride, width, 3, fout) != 3) {
+    if (fwrite(rgb + y * stride, width, bytes_per_px, fout) != bytes_per_px) {
       return 0;
     }
   }
@@ -289,8 +297,10 @@ static void SaveOutput(const WebPDecBuffer* const buffer,
 #else
     ok &= WritePNG(fout, buffer);
 #endif
+  } else if (format == PAM) {
+    ok &= WritePPM(fout, buffer, 1);
   } else if (format == PPM) {
-    ok &= WritePPM(fout, buffer);
+    ok &= WritePPM(fout, buffer, 0);
   } else if (format == PGM) {
     ok &= WritePGM(fout, buffer);
   } else if (format == ALPHA_PLANE_ONLY) {
@@ -314,7 +324,8 @@ static void Help(void) {
   printf("Usage: dwebp in_file [options] [-o out_file]\n\n"
          "Decodes the WebP image file to PNG format [Default]\n"
          "Use following options to convert into alternate image formats:\n"
-         "  -ppm ......... save the raw RGB samples as color PPM\n"
+         "  -pam ......... save the raw RGBA samples as a color PAM\n"
+         "  -ppm ......... save the raw RGB samples as a color PPM\n"
          "  -pgm ......... save the raw YUV samples as a grayscale PGM\n"
          "                 file with IMC4 layout.\n"
          " Other options are:\n"
@@ -367,6 +378,8 @@ int main(int argc, const char *argv[]) {
       config.options.no_fancy_upsampling = 1;
     } else if (!strcmp(argv[c], "-nofilter")) {
       config.options.bypass_filtering = 1;
+    } else if (!strcmp(argv[c], "-pam")) {
+      format = PAM;
     } else if (!strcmp(argv[c], "-ppm")) {
       format = PPM;
     } else if (!strcmp(argv[c], "-version")) {
@@ -434,6 +447,9 @@ int main(int argc, const char *argv[]) {
         output_buffer->colorspace = bitstream->has_alpha ? MODE_RGBA : MODE_RGB;
 #endif
         break;
+      case PAM:
+        output_buffer->colorspace = MODE_RGBA;
+        break;
       case PPM:
         output_buffer->colorspace = MODE_RGB;  // drops alpha for PPM
         break;
diff --git a/man/dwebp.1 b/man/dwebp.1
index 637297f6..1e8b6c56 100644
--- a/man/dwebp.1
+++ b/man/dwebp.1
@@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH DWEBP 1 "January 24, 2012"
+.TH DWEBP 1 "July 20, 2012"
 .SH NAME
 dwebp \- decompress a WebP file to an image file
 .SH SYNOPSIS
@@ -11,7 +11,7 @@ This manual page documents the
 .B dwebp
 command.
 .PP
-\fBdwebp\fP decompresses WebP files into PNG, PPM or PGM images.
+\fBdwebp\fP decompresses WebP files into PNG, PAM, PPM or PGM images.
 .SH OPTIONS
 The basic options are:
 .TP
@@ -24,8 +24,11 @@ Print the version number (as major.minor.revision) and exit.
 .B \-o string
 Specify the name of the output file (as PNG format by default).
 .TP
+.B \-pam
+Change the output format to PAM (retains alpha).
+.TP
 .B \-ppm
-Change the output format to PPM.
+Change the output format to PPM (discards alpha).
 .TP
 .B \-pgm
 Change the output format to PGM. The output consist of luma/chroma

From 1889e9b6ccc0265656706cc8c4cf498be80fd32a Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 20 Jul 2012 19:42:56 -0700
Subject: [PATCH 03/42] dwebp: report -alpha option

remove from WEBP_EXPERIMENTAL_FEATURES block; alpha is no longer
experimental.

Change-Id: I57df006ecac8122a987e52084813dc84ca7bcfd6
---
 examples/dwebp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/dwebp.c b/examples/dwebp.c
index 4846b8a3..49ff2fa8 100644
--- a/examples/dwebp.c
+++ b/examples/dwebp.c
@@ -335,9 +335,7 @@ static void Help(void) {
          "  -mt .......... use multi-threading\n"
          "  -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
          "  -scale <w> <h> .......... scale the output (*after* any cropping)\n"
-#ifdef WEBP_EXPERIMENTAL_FEATURES
          "  -alpha ....... only save the alpha plane.\n"
-#endif
          "  -h     ....... this help message.\n"
          "  -v     ....... verbose (e.g. print encoding/decoding times)\n"
 #ifndef WEBP_DLL

From 159b75d31af70923d624acd017aa65da92cd69c5 Mon Sep 17 00:00:00 2001
From: Urvang Joshi <urvang@google.com>
Date: Mon, 23 Jul 2012 12:47:24 +0530
Subject: [PATCH 04/42] cwebp output size consistency:

In case of lossless too, it should report full file size.
Fixes this issue: http://code.google.com/p/webp/issues/detail?id=126

Change-Id: I96e2bf09e6c9470a0267f5eea911d9b40d1addb3
---
 src/enc/vp8l.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c
index 68fb5f17..09e26ec3 100644
--- a/src/enc/vp8l.c
+++ b/src/enc/vp8l.c
@@ -754,7 +754,7 @@ static WebPEncodingError WriteImage(const WebPPicture* const pic,
       goto Error;
     }
   }
-  *coded_size = vp8l_size;
+  *coded_size = CHUNK_HEADER_SIZE + riff_size;
   return VP8_ENC_OK;
 
  Error:

From 5955cf5e89e8bae012e5e534950a46db17f66a79 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Mon, 23 Jul 2012 14:21:30 -0700
Subject: [PATCH 05/42] replace x*155/100 by x*101581>>16

Don't expect a visible speed diff. it's just cool.
(and, that's one less TODO in the code).

Change-Id: Iaeb2f1c930debb51501e170ee806f2f945fb1a8d
---
 src/dec/quant.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/dec/quant.c b/src/dec/quant.c
index 95514114..d54097af 100644
--- a/src/dec/quant.c
+++ b/src/dec/quant.c
@@ -94,8 +94,10 @@ void VP8ParseQuant(VP8Decoder* const dec) {
       m->y1_mat_[1] = kAcTable[clip(q + 0,       127)];
 
       m->y2_mat_[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
-      // TODO(skal): make it another table?
-      m->y2_mat_[1] = kAcTable[clip(q + dqy2_ac, 127)] * 155 / 100;
+      // For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
+      // The smallest precision for that is '(x*6349) >> 12' but 16 is a good
+      // word size.
+      m->y2_mat_[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
       if (m->y2_mat_[1] < 8) m->y2_mat_[1] = 8;
 
       m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];

From d39177b74cd0a84eea7a8de3c6f625d2a670f6d8 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Mon, 23 Jul 2012 14:26:56 -0700
Subject: [PATCH 06/42] make QuantizeLevels() store the sum of squared error

(instead of MSE).
Useful for directly storing the alpha-PSNR (in another patch)

Change-Id: I4072864f9c53eb4f38366e8025a2816eb14f504e
---
 src/utils/quant_levels.c | 27 +++++++++------------------
 src/utils/quant_levels.h |  8 ++++----
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/src/utils/quant_levels.c b/src/utils/quant_levels.c
index ba779a0b..f6884392 100644
--- a/src/utils/quant_levels.c
+++ b/src/utils/quant_levels.c
@@ -11,7 +11,6 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <assert.h>
-#include <math.h>    // for sqrt()
 
 #include "./quant_levels.h"
 
@@ -27,8 +26,8 @@ extern "C" {
 // -----------------------------------------------------------------------------
 // Quantize levels.
 
-int QuantizeLevels(uint8_t* data, int width, int height,
-                   int num_levels, float* mse) {
+int QuantizeLevels(uint8_t* const data, int width, int height,
+                   int num_levels, uint64_t* const sse) {
   int freq[NUM_SYMBOLS] = { 0 };
   int q_level[NUM_SYMBOLS] = { 0 };
   double inv_q_level[NUM_SYMBOLS] = { 0 };
@@ -36,6 +35,7 @@ int QuantizeLevels(uint8_t* data, int width, int height,
   const size_t data_size = height * width;
   int i, num_levels_in, iter;
   double last_err = 1.e38, err = 0.;
+  const double err_threshold = ERROR_THRESHOLD * data_size;
 
   if (data == NULL) {
     return 0;
@@ -60,10 +60,7 @@ int QuantizeLevels(uint8_t* data, int width, int height,
     }
   }
 
-  if (num_levels_in <= num_levels) {
-    if (mse) *mse = 0.;
-    return 1;   // nothing to do !
-  }
+  if (num_levels_in <= num_levels) goto End;  // nothing to do!
 
   // Start with uniformly spread centroids.
   for (i = 0; i < num_levels; ++i) {
@@ -78,7 +75,6 @@ int QuantizeLevels(uint8_t* data, int width, int height,
 
   // k-Means iterations.
   for (iter = 0; iter < MAX_ITER; ++iter) {
-    double err_count;
     double q_sum[NUM_SYMBOLS] = { 0 };
     double q_count[NUM_SYMBOLS] = { 0 };
     int s, slot = 0;
@@ -109,17 +105,14 @@ int QuantizeLevels(uint8_t* data, int width, int height,
 
     // Compute convergence error.
     err = 0.;
-    err_count = 0.;
     for (s = min_s; s <= max_s; ++s) {
       const double error = s - inv_q_level[q_level[s]];
       err += freq[s] * error * error;
-      err_count += freq[s];
     }
-    if (err_count > 0.) err /= err_count;
 
     // Check for convergence: we stop as soon as the error is no
     // longer improving.
-    if (last_err - err < ERROR_THRESHOLD) break;
+    if (last_err - err < err_threshold) break;
     last_err = err;
   }
 
@@ -140,16 +133,14 @@ int QuantizeLevels(uint8_t* data, int width, int height,
       data[n] = map[data[n]];
     }
   }
-
-  // Compute final mean squared error if needed.
-  if (mse != NULL) {
-    *mse = (float)sqrt(err);
-  }
+ End:
+  // Store sum of squared error if needed.
+  if (sse != NULL) *sse = (uint64_t)err;
 
   return 1;
 }
 
-int DequantizeLevels(uint8_t* data, int width, int height) {
+int DequantizeLevels(uint8_t* const data, int width, int height) {
   if (data == NULL || width <= 0 || height <= 0) return 0;
   // TODO(skal): implement gradient smoothing.
   (void)data;
diff --git a/src/utils/quant_levels.h b/src/utils/quant_levels.h
index d1075e62..89ccafe4 100644
--- a/src/utils/quant_levels.h
+++ b/src/utils/quant_levels.h
@@ -21,16 +21,16 @@ extern "C" {
 #endif
 
 // Replace the input 'data' of size 'width'x'height' with 'num-levels'
-// quantized values. If not NULL, 'mse' will contain the mean-squared error.
+// quantized values. If not NULL, 'sse' will contain the sum of squared error.
 // Valid range for 'num_levels' is [2, 256].
 // Returns false in case of error (data is NULL, or parameters are invalid).
-int QuantizeLevels(uint8_t* data, int width, int height, int num_levels,
-                   float* mse);
+int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
+                   uint64_t* const sse);
 
 // Apply post-processing to input 'data' of size 'width'x'height' assuming
 // that the source was quantized to a reduced number of levels.
 // Returns false in case of error (data is NULL, invalid parameters, ...).
-int DequantizeLevels(uint8_t* data, int width, int height);
+int DequantizeLevels(uint8_t* const data, int width, int height);
 
 #if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"

From 7d853d79dc23f50118db9c2832f94b9fb0ae15c4 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Tue, 24 Jul 2012 16:15:36 -0700
Subject: [PATCH 07/42] add stats for lossless

* Extend AuxStats with new fields
  it's slightly ABI-incompatible, but i guess it's ok for 0.1.99+
  I expect to add more stats later, possibly (predictor stats, etc.)
* Have cwebp report the features used by lossless
  compression (either for alpha or full lossless coding)
* Print the PSNR for alpha (useful in case of -alpha_q)
* clean-up alpha.c signatures
+ misc cleanup (added const '* const ptr', etc.)

Change-Id: I157a21581f1793cb0c6cc0882e7b0a2dde68a970
---
 examples/cwebp.c  |  37 ++++++++++++---
 src/enc/alpha.c   | 117 +++++++++++++++++++++++++---------------------
 src/enc/vp8enci.h |   8 ++--
 src/enc/vp8l.c    |  41 ++++++++++++----
 src/enc/vp8li.h   |   1 +
 src/enc/webpenc.c |   1 +
 src/webp/encode.h |  13 +++++-
 7 files changed, 143 insertions(+), 75 deletions(-)

diff --git a/examples/cwebp.c b/examples/cwebp.c
index 86f5b7ae..9f933cff 100644
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@@ -614,6 +614,25 @@ static void PrintValues(const int values[4]) {
   fprintf(stderr, "|\n");
 }
 
+static void PrintFullLosslessInfo(const WebPAuxStats* const stats,
+                                  const char* const description) {
+  fprintf(stderr, "Lossless-%s compressed size: %d bytes\n",
+          description, stats->lossless_size);
+  if (stats->lossless_features) {
+    fprintf(stderr, "  * Lossless features used:");
+    if (stats->lossless_features & 1) fprintf(stderr, " PREDICTION");
+    if (stats->lossless_features & 2) fprintf(stderr, " CROSS-COLOR-TRANSFORM");
+    if (stats->lossless_features & 4) fprintf(stderr, " SUBTRACT-GREEN");
+    if (stats->lossless_features & 8) fprintf(stderr, " PALETTE");
+    fprintf(stderr, "\n");
+  }
+  fprintf(stderr, "  * Precision Bits: histogram=%d transform=%d cache=%d\n",
+          stats->histogram_bits, stats->transform_bits, stats->cache_bits);
+  if (stats->palette_size > 0) {
+    fprintf(stderr, "  * Palette size:   %d\n", stats->palette_size);
+  }
+}
+
 static void PrintExtraInfoLossless(const WebPPicture* const pic,
                                    int short_output,
                                    const char* const file_name) {
@@ -624,6 +643,7 @@ static void PrintExtraInfoLossless(const WebPPicture* const pic,
     fprintf(stderr, "File:      %s\n", file_name);
     fprintf(stderr, "Dimension: %d x %d\n", pic->width, pic->height);
     fprintf(stderr, "Output:    %d bytes\n", stats->coded_size);
+    PrintFullLosslessInfo(stats, "ARGB");
   }
 }
 
@@ -658,9 +678,9 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
               100.f * stats->header_bytes[0] / stats->coded_size,
               stats->header_bytes[1],
               100.f * stats->header_bytes[1] / stats->coded_size);
-      if (stats->alpha_data_size) {
-        fprintf(stderr, "             transparency:   %6d\n",
-                stats->alpha_data_size);
+      if (stats->alpha_data_size > 0) {
+        fprintf(stderr, "             transparency:   %6d (%.1f dB)\n",
+                stats->alpha_data_size, stats->PSNR[4]);
       }
       if (stats->layer_data_size) {
         fprintf(stderr, "             enhancement:    %6d\n",
@@ -686,8 +706,11 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
       fprintf(stderr, " segments total:  ");
       PrintByteCount(totals, stats->coded_size, NULL);
     }
+    if (stats->lossless_size > 0) {
+      PrintFullLosslessInfo(stats, "alpha");
+    }
   }
-  if (pic->extra_info) {
+  if (pic->extra_info != NULL) {
     const int mb_w = (pic->width + 15) / 16;
     const int mb_h = (pic->height + 15) / 16;
     const int type = pic->extra_info_type;
@@ -1100,8 +1123,10 @@ int main(int argc, const char *argv[]) {
       fprintf(stderr, "be performed, but its results discarded.\n\n");
     }
   }
-  picture.stats = &stats;
-  stats.user_data = (void*)in_file;
+  if (!quiet) {
+    picture.stats = &stats;
+    stats.user_data = (void*)in_file;
+  }
 
   // Compress
   if (verbose) {
diff --git a/src/enc/alpha.c b/src/enc/alpha.c
index b31cee8c..62d5c8fa 100644
--- a/src/enc/alpha.c
+++ b/src/enc/alpha.c
@@ -22,19 +22,15 @@ extern "C" {
 #endif
 
 // -----------------------------------------------------------------------------
-// int EncodeAlpha(const uint8_t* data, int width, int height, int stride,
-//                 int quality, int method, int filter, int effort_level,
-//                 uint8_t** output, size_t* output_size)
-//
-// Encodes the given alpha data 'data' of size 'stride'x'height' via specified
-// compression method 'method'. The pre-processing (Quantization) is
-// performed if 'quality' is less than 100. For such cases, the encoding is
-// lossy. Valid ranges for 'quality' is [0, 100] and 'method' is [0, 1]:
+// Encodes the given alpha data via specified compression method 'method'.
+// The pre-processing (quantization) is performed if 'quality' is less than 100.
+// For such cases, the encoding is lossy. The valid range is [0, 100] for
+// 'quality' and [0, 1] for 'method':
 //   'method = 0' - No compression;
 //   'method = 1' - Use lossless coder on the alpha plane only
 // 'filter' values [0, 4] correspond to prediction modes none, horizontal,
 // vertical & gradient filters. The prediction mode 4 will try all the
-// prediction modes (0 to 3) and pick the best prediction mode.
+// prediction modes 0 to 3 and pick the best one.
 // 'effort_level': specifies how much effort must be spent to try and reduce
 //  the compressed output size. In range 0 (quick) to 6 (slow).
 //
@@ -50,10 +46,10 @@ extern "C" {
 
 #include "../enc/vp8li.h"
 
-static int EncodeLossless(const uint8_t* data, int width, int height,
+static int EncodeLossless(const uint8_t* const data, int width, int height,
                           int effort_level,  // in [0..6] range
-                          VP8BitWriter* const bw) {
-
+                          VP8BitWriter* const bw,
+                          WebPAuxStats* const stats) {
   int ok = 0;
   WebPConfig config;
   WebPPicture picture;
@@ -63,6 +59,7 @@ static int EncodeLossless(const uint8_t* data, int width, int height,
   picture.width = width;
   picture.height = height;
   picture.use_argb = 1;
+  picture.stats = stats;
   if (!WebPPictureAlloc(&picture)) return 0;
 
   // Transfer the alpha values to the green channel.
@@ -101,10 +98,12 @@ static int EncodeLossless(const uint8_t* data, int width, int height,
 
 // -----------------------------------------------------------------------------
 
-static int EncodeAlphaInternal(const uint8_t* data, int width, int height,
+static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
                                int method, int filter, int reduce_levels,
                                int effort_level,  // in [0..6] range
-                               uint8_t* tmp_alpha, VP8BitWriter* const bw) {
+                               uint8_t* const tmp_alpha,
+                               VP8BitWriter* const bw,
+                               WebPAuxStats* const stats) {
   int ok = 0;
   const uint8_t* alpha_src;
   WebPFilterFunc filter_func;
@@ -139,7 +138,7 @@ static int EncodeAlphaInternal(const uint8_t* data, int width, int height,
     ok = VP8BitWriterAppend(bw, alpha_src, width * height);
     ok = ok && !bw->error_;
   } else {
-    ok = EncodeLossless(alpha_src, width, height, effort_level, bw);
+    ok = EncodeLossless(alpha_src, width, height, effort_level, bw, stats);
     VP8BitWriterFinish(bw);
   }
   return ok;
@@ -157,19 +156,25 @@ static void CopyPlane(const uint8_t* src, int src_stride,
   }
 }
 
-static int EncodeAlpha(const uint8_t* data, int width, int height, int stride,
+static int EncodeAlpha(VP8Encoder* const enc,
                        int quality, int method, int filter,
                        int effort_level,
-                       uint8_t** output, size_t* output_size) {
+                       uint8_t** const output, size_t* const output_size) {
+  const WebPPicture* const pic = enc->pic_;
+  const int width = pic->width;
+  const int height = pic->height;
+
   uint8_t* quant_alpha = NULL;
   const size_t data_size = width * height;
+  uint64_t sse = 0;
   int ok = 1;
   const int reduce_levels = (quality < 100);
 
   // quick sanity checks
-  assert(data != NULL && output != NULL && output_size != NULL);
+  assert(enc != NULL && pic != NULL && pic->a != NULL);
+  assert(output != NULL && output_size != NULL);
   assert(width > 0 && height > 0);
-  assert(stride >= width);
+  assert(pic->a_stride >= width);
   assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST);
 
   if (quality < 0 || quality > 100) {
@@ -186,7 +191,7 @@ static int EncodeAlpha(const uint8_t* data, int width, int height, int stride,
   }
 
   // Extract alpha data (width x height) from raw_data (stride x height).
-  CopyPlane(data, stride, quant_alpha, width, width, height);
+  CopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height);
 
   if (reduce_levels) {  // No Quantization required for 'quality = 100'.
     // 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence
@@ -194,24 +199,22 @@ static int EncodeAlpha(const uint8_t* data, int width, int height, int stride,
     // and Quality:]70, 100] -> Levels:]16, 256].
     const int alpha_levels = (quality <= 70) ? (2 + quality / 5)
                                              : (16 + (quality - 70) * 8);
-    ok = QuantizeLevels(quant_alpha, width, height, alpha_levels, NULL);
+    ok = QuantizeLevels(quant_alpha, width, height, alpha_levels, &sse);
   }
 
   if (ok) {
     VP8BitWriter bw;
-    size_t best_score;
     int test_filter;
     uint8_t* filtered_alpha = NULL;
 
     // We always test WEBP_FILTER_NONE first.
     ok = EncodeAlphaInternal(quant_alpha, width, height,
                              method, WEBP_FILTER_NONE, reduce_levels,
-                             effort_level, NULL, &bw);
+                             effort_level, NULL, &bw, pic->stats);
     if (!ok) {
       VP8BitWriterWipeOut(&bw);
       goto End;
     }
-    best_score = VP8BitWriterSize(&bw);
 
     if (filter == WEBP_FILTER_FAST) {  // Quick estimate of a second candidate?
       filter = EstimateBestFilter(quant_alpha, width, height, width);
@@ -228,35 +231,46 @@ static int EncodeAlpha(const uint8_t* data, int width, int height, int stride,
     }
 
     // Try the other mode(s).
-    for (test_filter = WEBP_FILTER_HORIZONTAL;
-         ok && (test_filter <= WEBP_FILTER_GRADIENT);
-         ++test_filter) {
-      VP8BitWriter tmp_bw;
-      if (filter != WEBP_FILTER_BEST && test_filter != filter) {
-        continue;
-      }
-
-      ok = EncodeAlphaInternal(quant_alpha, width, height,
-                               method, test_filter, reduce_levels,
-                               effort_level, filtered_alpha, &tmp_bw);
-      if (ok) {
-        const size_t score = VP8BitWriterSize(&tmp_bw);
-        if (score < best_score) {
-          // swap bitwriter objects.
-          VP8BitWriter tmp = tmp_bw;
-          tmp_bw = bw;
-          bw = tmp;
-          best_score = score;
+    {
+      WebPAuxStats best_stats;
+      size_t best_score = VP8BitWriterSize(&bw);
+      if (pic->stats != NULL) best_stats = *pic->stats;
+      for (test_filter = WEBP_FILTER_HORIZONTAL;
+           ok && (test_filter <= WEBP_FILTER_GRADIENT);
+           ++test_filter) {
+        VP8BitWriter tmp_bw;
+        if (filter != WEBP_FILTER_BEST && test_filter != filter) {
+          continue;
         }
-      } else {
-        VP8BitWriterWipeOut(&bw);
+        ok = EncodeAlphaInternal(quant_alpha, width, height,
+                                 method, test_filter, reduce_levels,
+                                 effort_level, filtered_alpha, &tmp_bw,
+                                 pic->stats);
+        if (ok) {
+          const size_t score = VP8BitWriterSize(&tmp_bw);
+          if (score < best_score) {
+            // swap bitwriter objects.
+            VP8BitWriter tmp = tmp_bw;
+            tmp_bw = bw;
+            bw = tmp;
+            best_score = score;
+            if (pic->stats != NULL) best_stats = *pic->stats;
+          }
+        } else {
+          VP8BitWriterWipeOut(&bw);
+        }
+        VP8BitWriterWipeOut(&tmp_bw);
       }
-      VP8BitWriterWipeOut(&tmp_bw);
+      if (pic->stats != NULL) *pic->stats = best_stats;
     }
  Ok:
     if (ok) {
       *output_size = VP8BitWriterSize(&bw);
       *output = VP8BitWriterBuf(&bw);
+      if (pic->stats != NULL) {         // need stats?
+        pic->stats->coded_size += *output_size;
+        enc->sse_[3] = sse;
+      }
     }
     free(filtered_alpha);
   }
@@ -269,16 +283,15 @@ static int EncodeAlpha(const uint8_t* data, int width, int height, int stride,
 //------------------------------------------------------------------------------
 // Main calls
 
-void VP8EncInitAlpha(VP8Encoder* enc) {
+void VP8EncInitAlpha(VP8Encoder* const enc) {
   enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
   enc->alpha_data_ = NULL;
   enc->alpha_data_size_ = 0;
 }
 
-int VP8EncFinishAlpha(VP8Encoder* enc) {
+int VP8EncFinishAlpha(VP8Encoder* const enc) {
   if (enc->has_alpha_) {
     const WebPConfig* config = enc->config_;
-    const WebPPicture* pic = enc->pic_;
     uint8_t* tmp_data = NULL;
     size_t tmp_size = 0;
     const int effort_level = config->method;  // maps to [0..6]
@@ -287,9 +300,7 @@ int VP8EncFinishAlpha(VP8Encoder* enc) {
         (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
                                          WEBP_FILTER_BEST;
 
-    assert(pic->a);
-    if (!EncodeAlpha(pic->a, pic->width, pic->height, pic->a_stride,
-                     config->alpha_quality, config->alpha_compression,
+    if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
                      filter, effort_level, &tmp_data, &tmp_size)) {
       return 0;
     }
@@ -303,7 +314,7 @@ int VP8EncFinishAlpha(VP8Encoder* enc) {
   return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
 }
 
-void VP8EncDeleteAlpha(VP8Encoder* enc) {
+void VP8EncDeleteAlpha(VP8Encoder* const enc) {
   free(enc->alpha_data_);
   enc->alpha_data_ = NULL;
   enc->alpha_data_size_ = 0;
diff --git a/src/enc/vp8enci.h b/src/enc/vp8enci.h
index 17ae2621..56f2f11b 100644
--- a/src/enc/vp8enci.h
+++ b/src/enc/vp8enci.h
@@ -402,7 +402,7 @@ struct VP8Encoder {
 
   // probabilities and statistics
   VP8Proba proba_;
-  uint64_t sse_[3];        // sum of Y/U/V squared errors for all macroblocks
+  uint64_t sse_[4];        // sum of Y/U/V/A squared errors for all macroblocks
   uint64_t sse_count_;     // pixel count for the sse_[] stats
   int      coded_size_;
   int      residual_bytes_[3][4];
@@ -488,9 +488,9 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
 
   // in alpha.c
-void VP8EncInitAlpha(VP8Encoder* enc);           // initialize alpha compression
-int VP8EncFinishAlpha(VP8Encoder* enc);          // finalize compressed data
-void VP8EncDeleteAlpha(VP8Encoder* enc);         // delete compressed data
+void VP8EncInitAlpha(VP8Encoder* const enc);    // initialize alpha compression
+int VP8EncFinishAlpha(VP8Encoder* const enc);   // finalize compressed data
+void VP8EncDeleteAlpha(VP8Encoder* const enc);  // delete compressed data
 
   // in layer.c
 void VP8EncInitLayer(VP8Encoder* const enc);     // init everything
diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c
index 09e26ec3..afe810ed 100644
--- a/src/enc/vp8l.c
+++ b/src/enc/vp8l.c
@@ -610,7 +610,7 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
 
 // Check if it would be a good idea to subtract green from red and blue. We
 // only impact entropy in red/blue components, don't bother to look at others.
-static int EvalAndApplySubtractGreen(const VP8LEncoder* const enc,
+static int EvalAndApplySubtractGreen(VP8LEncoder* const enc,
                                      int width, int height,
                                      VP8LBitWriter* const bw) {
   if (!enc->use_palette_) {
@@ -639,7 +639,8 @@ static int EvalAndApplySubtractGreen(const VP8LEncoder* const enc,
     free(histo);
 
     // Check if subtracting green yields low entropy.
-    if (bit_cost_after < bit_cost_before) {
+    enc->use_subtract_green_ = (bit_cost_after < bit_cost_before);
+    if (enc->use_subtract_green_) {
       VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
       VP8LWriteBits(bw, 2, SUBTRACT_GREEN);
       VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
@@ -938,6 +939,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
   const int width = picture->width;
   const int height = picture->height;
   VP8LEncoder* const enc = VP8LEncoderNew(config, picture);
+  const size_t byte_position = VP8LBitWriterNumBytes(bw);
 
   if (enc == NULL) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
@@ -1017,6 +1019,20 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
     goto Error;
   }
 
+  if (picture->stats != NULL) {
+    WebPAuxStats* const stats = picture->stats;
+    stats->lossless_features = 0;
+    if (enc->use_predict_) stats->lossless_features |= 1;
+    if (enc->use_cross_color_) stats->lossless_features |= 2;
+    if (enc->use_subtract_green_) stats->lossless_features |= 4;
+    if (enc->use_palette_) stats->lossless_features |= 8;
+    stats->histogram_bits = enc->histo_bits_;
+    stats->transform_bits = enc->transform_bits_;
+    stats->cache_bits = enc->cache_bits_;
+    stats->palette_size = enc->palette_size_;
+    stats->lossless_size = VP8LBitWriterNumBytes(bw) - byte_position;
+  }
+
  Error:
   VP8LEncoderDelete(enc);
   return err;
@@ -1045,6 +1061,16 @@ int VP8LEncodeImage(const WebPConfig* const config,
     err = VP8_ENC_ERROR_USER_ABORT;
     goto Error;
   }
+  // Reset stats (for pure lossless coding)
+  if (picture->stats != NULL) {
+    WebPAuxStats* const stats = picture->stats;
+    memset(stats, 0, sizeof(*stats));
+    stats->PSNR[0] = 99.;
+    stats->PSNR[1] = 99.;
+    stats->PSNR[2] = 99.;
+    stats->PSNR[3] = 99.;
+    stats->PSNR[4] = 99.;
+  }
 
   // Write image size.
   VP8LBitWriterInit(&bw, (width * height) >> 1);
@@ -1075,15 +1101,10 @@ int VP8LEncodeImage(const WebPConfig* const config,
 
   if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort;
 
-  // Collect some stats if needed.
+  // Save size.
   if (picture->stats != NULL) {
-    WebPAuxStats* const stats = picture->stats;
-    memset(stats, 0, sizeof(*stats));
-    stats->PSNR[0] = 99.;
-    stats->PSNR[1] = 99.;
-    stats->PSNR[2] = 99.;
-    stats->PSNR[3] = 99.;
-    stats->coded_size = (int)coded_size;
+    picture->stats->coded_size += (int)coded_size;
+    picture->stats->lossless_size = (int)coded_size;
   }
 
   if (picture->extra_info != NULL) {
diff --git a/src/enc/vp8li.h b/src/enc/vp8li.h
index 083ff595..eae90dd6 100644
--- a/src/enc/vp8li.h
+++ b/src/enc/vp8li.h
@@ -38,6 +38,7 @@ typedef struct {
 
   // Encoding parameters derived from image characteristics.
   int use_cross_color_;
+  int use_subtract_green_;
   int use_predict_;
   int use_palette_;
   int palette_size_;
diff --git a/src/enc/webpenc.c b/src/enc/webpenc.c
index 99ab170b..a00ac640 100644
--- a/src/enc/webpenc.c
+++ b/src/enc/webpenc.c
@@ -284,6 +284,7 @@ static void FinalizePSNR(const VP8Encoder* const enc) {
   stats->PSNR[1] = (float)GetPSNR(sse[1], size / 4);
   stats->PSNR[2] = (float)GetPSNR(sse[2], size / 4);
   stats->PSNR[3] = (float)GetPSNR(sse[0] + sse[1] + sse[2], size * 3 / 2);
+  stats->PSNR[4] = (float)GetPSNR(sse[3], size);
 }
 
 static void StoreStats(VP8Encoder* const enc) {
diff --git a/src/webp/encode.h b/src/webp/encode.h
index d2857659..1ee42819 100644
--- a/src/webp/encode.h
+++ b/src/webp/encode.h
@@ -157,7 +157,7 @@ typedef struct WebPPicture WebPPicture;   // main structure for I/O
 typedef struct {
   int coded_size;         // final size
 
-  float PSNR[4];          // peak-signal-to-noise ratio for Y/U/V/All
+  float PSNR[5];          // peak-signal-to-noise ratio for Y/U/V/All/Alpha
   int block_count[3];     // number of intra4/intra16/skipped macroblocks
   int header_bytes[2];    // approximate number of bytes spent for header
                           // and mode-partition #0
@@ -173,7 +173,16 @@ typedef struct {
   void* user_data;        // this field is free to be set to any value and
                           // used during callbacks (like progress-report e.g.).
 
-  uint32_t pad[6];        // padding for later use
+  // lossless encoder statistics
+  uint32_t lossless_features;  // bit0:predictor bit1:cross-color transform
+                               // bit2:subtract-green bit3:color indexing
+  int histogram_bits;          // number of precision bits of histogram
+  int transform_bits;          // precision bits for transform
+  int cache_bits;              // number of bits for color cache lookup
+  int palette_size;            // number of color in palette, if used
+  int lossless_size;           // final lossless size
+
+  uint32_t pad[4];        // padding for later use
 } WebPAuxStats;
 
 // Signature for output function. Should return true if writing was successful.

From a7cc729126d2ffb1936d81b4a583357c7270070f Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Wed, 25 Jul 2012 14:18:21 -0700
Subject: [PATCH 08/42] fix type and conversion warnings

avoids warning messages on MSVC mainly

Change-Id: I80f281d5263a54c6a224bb095175497cf2f4ce1e
---
 src/enc/alpha.c |  2 +-
 src/enc/vp8l.c  | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/enc/alpha.c b/src/enc/alpha.c
index 62d5c8fa..97538da9 100644
--- a/src/enc/alpha.c
+++ b/src/enc/alpha.c
@@ -268,7 +268,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
       *output_size = VP8BitWriterSize(&bw);
       *output = VP8BitWriterBuf(&bw);
       if (pic->stats != NULL) {         // need stats?
-        pic->stats->coded_size += *output_size;
+        pic->stats->coded_size += (int)(*output_size);
         enc->sse_[3] = sse;
       }
     }
diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c
index afe810ed..82bef500 100644
--- a/src/enc/vp8l.c
+++ b/src/enc/vp8l.c
@@ -1030,7 +1030,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
     stats->transform_bits = enc->transform_bits_;
     stats->cache_bits = enc->cache_bits_;
     stats->palette_size = enc->palette_size_;
-    stats->lossless_size = VP8LBitWriterNumBytes(bw) - byte_position;
+    stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position);
   }
 
  Error:
@@ -1065,11 +1065,11 @@ int VP8LEncodeImage(const WebPConfig* const config,
   if (picture->stats != NULL) {
     WebPAuxStats* const stats = picture->stats;
     memset(stats, 0, sizeof(*stats));
-    stats->PSNR[0] = 99.;
-    stats->PSNR[1] = 99.;
-    stats->PSNR[2] = 99.;
-    stats->PSNR[3] = 99.;
-    stats->PSNR[4] = 99.;
+    stats->PSNR[0] = 99.f;
+    stats->PSNR[1] = 99.f;
+    stats->PSNR[2] = 99.f;
+    stats->PSNR[3] = 99.f;
+    stats->PSNR[4] = 99.f;
   }
 
   // Write image size.

From 475d87d767faae597c8ce4410a63b739d7a493c2 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 27 Jul 2012 19:53:16 -0700
Subject: [PATCH 09/42] WebPEncode: clear stats at the start of encode

also relocate user_data from WebPAuxStats to the WebPPicture struct to
make clearing easier while placing it closer to the progress hook with
which it's used.
prior to this change some spurious lossless data could be reported in
the lossy (sans alpha) encoding case. additionally user_data could be
lost during lossless encoding.

Change-Id: I929fae3dfde4d445ff81bbaad51445ea586dd80b
---
 examples/cwebp.c  | 4 ++--
 src/enc/webpenc.c | 2 ++
 src/webp/encode.h | 6 +++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/examples/cwebp.c b/examples/cwebp.c
index 9f933cff..c1a5b13c 100644
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@@ -779,7 +779,7 @@ static int DumpPicture(const WebPPicture* const picture, const char* PGM_name) {
 
 static int ProgressReport(int percent, const WebPPicture* const picture) {
   printf("[%s]: %3d %%      \r",
-         (char*)picture->stats->user_data, percent);
+         (char*)picture->user_data, percent);
   fflush(stdout);
   return 1;  // all ok
 }
@@ -1125,7 +1125,7 @@ int main(int argc, const char *argv[]) {
   }
   if (!quiet) {
     picture.stats = &stats;
-    stats.user_data = (void*)in_file;
+    picture.user_data = (void*)in_file;
   }
 
   // Compress
diff --git a/src/enc/webpenc.c b/src/enc/webpenc.c
index a00ac640..8649ce2d 100644
--- a/src/enc/webpenc.c
+++ b/src/enc/webpenc.c
@@ -344,6 +344,8 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
   if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION)
     return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
 
+  if (pic->stats != NULL) memset(pic->stats, 0, sizeof(*pic->stats));
+
   if (!config->lossless) {
     VP8Encoder* enc = NULL;
     if (pic->y == NULL || pic->u == NULL || pic->v == NULL) {
diff --git a/src/webp/encode.h b/src/webp/encode.h
index 1ee42819..18e433d4 100644
--- a/src/webp/encode.h
+++ b/src/webp/encode.h
@@ -170,9 +170,6 @@ typedef struct {
   int alpha_data_size;    // size of the transparency data
   int layer_data_size;    // size of the enhancement layer data
 
-  void* user_data;        // this field is free to be set to any value and
-                          // used during callbacks (like progress-report e.g.).
-
   // lossless encoder statistics
   uint32_t lossless_features;  // bit0:predictor bit1:cross-color transform
                                // bit2:subtract-green bit3:color indexing
@@ -299,6 +296,9 @@ struct WebPPicture {
   // If not NULL, report progress during encoding.
   WebPProgressHook progress_hook;
 
+  void* user_data;        // this field is free to be set to any value and
+                          // used during callbacks (like progress-report e.g.).
+
   uint32_t pad3[3];       // padding for later use
 
   // Unused for now: original samples (for non-YUV420 modes)

From 256afefa50174722512464e2c1aecb3ed650c12d Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 27 Jul 2012 18:56:55 -0700
Subject: [PATCH 10/42] cwebp: exit immediately on version mismatch

{Picture,Config}Init don't do allocations and attempting to free any of
their contents on version mismatch will likely cause a crash

Change-Id: I2a5aece235f9680fb406aec4799adceea7f62cfc
---
 examples/cwebp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cwebp.c b/examples/cwebp.c
index 9f933cff..2ca8d7e2 100644
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@@ -909,7 +909,7 @@ int main(int argc, const char *argv[]) {
       !WebPPictureInit(&original_picture) ||
       !WebPConfigInit(&config)) {
     fprintf(stderr, "Error! Version mismatch!\n");
-    goto Error;
+    return -1;
   }
 
   if (argc == 1) {

From cbfa9eecf497417ebe9a828b39ded310e0e65684 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Tue, 31 Jul 2012 11:59:54 -0700
Subject: [PATCH 11/42] lossless: fix crash on user abort

avoid free on uninitialized bit writer buffer

Change-Id: I1a41b2cea421bf5a2ea0af33c6e84018cb997caf
---
 src/enc/vp8l.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c
index 82bef500..362b767b 100644
--- a/src/enc/vp8l.c
+++ b/src/enc/vp8l.c
@@ -1051,11 +1051,14 @@ int VP8LEncodeImage(const WebPConfig* const config,
 
   if (config == NULL || picture->argb == NULL) {
     err = VP8_ENC_ERROR_NULL_PARAMETER;
-    goto Error;
+    WebPEncodingSetError(picture, err);
+    return 0;
   }
 
   width = picture->width;
   height = picture->height;
+  VP8LBitWriterInit(&bw, (width * height) >> 1);
+
   if (!WebPReportProgress(picture, 1, &percent)) {
  UserAbort:
     err = VP8_ENC_ERROR_USER_ABORT;
@@ -1073,7 +1076,6 @@ int VP8LEncodeImage(const WebPConfig* const config,
   }
 
   // Write image size.
-  VP8LBitWriterInit(&bw, (width * height) >> 1);
   if (!WriteImageSize(picture, &bw)) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;

From 183cba83a7977b835be66f9c5498af704e14e449 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Tue, 31 Jul 2012 12:11:40 -0700
Subject: [PATCH 12/42] check VP8LBitWriterInit return

Change-Id: I460906281598f5792bd75a25b14b449c8daaff8c
---
 src/enc/alpha.c | 4 ++--
 src/enc/vp8l.c  | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/enc/alpha.c b/src/enc/alpha.c
index 97538da9..51a8cd10 100644
--- a/src/enc/alpha.c
+++ b/src/enc/alpha.c
@@ -84,8 +84,8 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
   config.quality = 10.f + 15.f * effort_level;
   if (config.quality > 100.f) config.quality = 100.f;
 
-  VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
-  ok = (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK);
+  ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
+  ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK);
   WebPPictureFree(&picture);
   if (ok) {
     const uint8_t* const data = VP8LBitWriterFinish(&tmp_bw);
diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c
index 362b767b..2d82ce26 100644
--- a/src/enc/vp8l.c
+++ b/src/enc/vp8l.c
@@ -1057,7 +1057,10 @@ int VP8LEncodeImage(const WebPConfig* const config,
 
   width = picture->width;
   height = picture->height;
-  VP8LBitWriterInit(&bw, (width * height) >> 1);
+  if (!VP8LBitWriterInit(&bw, (width * height) >> 1)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
 
   if (!WebPReportProgress(picture, 1, &percent)) {
  UserAbort:

From 80cc7303abce069415b039746c23a7750b4d6790 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Tue, 31 Jul 2012 16:56:39 -0700
Subject: [PATCH 13/42] WebPCheckMalloc() and WebPCheckCalloc():

safe size-checking versions of malloc() and calloc()

Change-Id: Iffa3138c48b9b254b3d7eaad913e1f852d9dafba
---
 Android.mk            |  1 +
 Makefile.vc           |  1 +
 makefile.unix         |  1 +
 src/utils/Makefile.am |  2 ++
 src/utils/utils.c     | 44 +++++++++++++++++++++++++++++++++++++++++++
 src/utils/utils.h     | 44 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 93 insertions(+)
 create mode 100644 src/utils/utils.c
 create mode 100644 src/utils/utils.h

diff --git a/Android.mk b/Android.mk
index 6931f9d5..8daa508d 100644
--- a/Android.mk
+++ b/Android.mk
@@ -47,6 +47,7 @@ LOCAL_SRC_FILES := \
     src/utils/quant_levels.c \
     src/utils/rescaler.c \
     src/utils/thread.c \
+    src/utils/utils.c \
 
 LOCAL_CFLAGS := -Wall -DANDROID -DHAVE_MALLOC_H -DHAVE_PTHREAD \
                 -DNOT_HAVE_LOG2 -DWEBP_USE_THREAD \
diff --git a/Makefile.vc b/Makefile.vc
index a0a495c4..c69b62dc 100644
--- a/Makefile.vc
+++ b/Makefile.vc
@@ -199,6 +199,7 @@ UTILS_OBJS = \
     $(DIROBJ)\utils\quant_levels.obj \
     $(DIROBJ)\utils\rescaler.obj \
     $(DIROBJ)\utils\thread.obj \
+    $(DIROBJ)\utils\utils.obj \
 
 LIBWEBP_OBJS = $(DEC_OBJS) $(DSP_OBJS) $(ENC_OBJS) $(UTILS_OBJS) $(LIBWEBP_OBJS)
 LIBWEBPMUX_OBJS = $(MUX_OBJS) $(LIBWEBPMUX_OBJS)
diff --git a/makefile.unix b/makefile.unix
index 473b90af..85b21073 100644
--- a/makefile.unix
+++ b/makefile.unix
@@ -130,6 +130,7 @@ UTILS_OBJS = \
     src/utils/quant_levels.o \
     src/utils/rescaler.o \
     src/utils/thread.o \
+    src/utils/utils.o \
 
 LIBWEBP_OBJS = $(DEC_OBJS) $(DSP_OBJS) $(ENC_OBJS) $(UTILS_OBJS)
 LIBWEBPMUX_OBJS = $(MUX_OBJS)
diff --git a/src/utils/Makefile.am b/src/utils/Makefile.am
index 96b2bd45..65054c03 100644
--- a/src/utils/Makefile.am
+++ b/src/utils/Makefile.am
@@ -20,6 +20,8 @@ libwebputils_la_SOURCES += rescaler.c
 libwebputils_la_SOURCES += rescaler.h
 libwebputils_la_SOURCES += thread.c
 libwebputils_la_SOURCES += thread.h
+libwebputils_la_SOURCES += utils.c
+libwebputils_la_SOURCES += utils.h
 
 libwebputilsinclude_HEADERS = ../webp/types.h
 libwebputilsincludedir = $(includedir)/webp
diff --git a/src/utils/utils.c b/src/utils/utils.c
new file mode 100644
index 00000000..673b7e28
--- /dev/null
+++ b/src/utils/utils.c
@@ -0,0 +1,44 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Checked memory allocation
+
+static int CheckSizeArguments(uint64_t nmemb, size_t size) {
+  const uint64_t total_size = nmemb * size;
+  if (nmemb == 0) return 1;
+  if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
+  if (total_size != (size_t)total_size) return 0;
+  return 1;
+}
+
+void* WebPSafeMalloc(uint64_t nmemb, size_t size) {
+  if (!CheckSizeArguments(nmemb, size)) return NULL;
+  return malloc((size_t)(nmemb * size));
+}
+
+void* WebPSafeCalloc(uint64_t nmemb, size_t size) {
+  if (!CheckSizeArguments(nmemb, size)) return NULL;
+  return calloc((size_t)nmemb, size);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/src/utils/utils.h b/src/utils/utils.h
new file mode 100644
index 00000000..a0347625
--- /dev/null
+++ b/src/utils/utils.h
@@ -0,0 +1,44 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_UTILS_H_
+#define WEBP_UTILS_UTILS_H_
+
+#include "../webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Memory allocation
+
+// This is the maximum memory amount that libwebp will ever try to allocate.
+#define WEBP_MAX_ALLOCABLE_MEMORY (1ULL << 40)
+
+// size-checking safe malloc/calloc: verify that the requested size is not too
+// large, or return NULL. You don't need to call these for constructs like
+// malloc(sizeof(foo)), but only if there's picture-dependent size involved
+// somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this
+// safe malloc() borrows the signature from calloc(), pointing at the dangerous
+// underlying multiply involved.
+void* WebPSafeMalloc(uint64_t nmemb, size_t size);
+// Note that WebPSafeCalloc() expects the second argument type to be 'size_t'
+// in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
+void* WebPSafeCalloc(uint64_t nmemb, size_t size);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_UTILS_H_ */

From dd1c3873fe61f36b41cfa0f1d1193633fa599455 Mon Sep 17 00:00:00 2001
From: Vikas Arora <vikasa@google.com>
Date: Tue, 31 Jul 2012 23:07:52 -0700
Subject: [PATCH 14/42] Add image-hint for low-color images.

For low-color images, it may be better to not use color-palettes.
Users should treat this as one another hint (as with Photo &
Picture) and another parameter for tuning the compression density.
The optimum compression can still be obtained by running (outer loop)
compression with all possible tunable parameters.

Change-Id: Icb1a4face2a84774e16e801aee4a8ae97e232e8a
---
 README            | 2 +-
 examples/cwebp.c  | 4 +++-
 man/cwebp.1       | 2 +-
 src/enc/config.c  | 2 +-
 src/enc/vp8l.c    | 3 +--
 src/webp/encode.h | 4 +++-
 6 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/README b/README
index 65a015c8..4896b24d 100644
--- a/README
+++ b/README
@@ -168,7 +168,7 @@ options:
   -noalpha ............... discard any transparency information.
   -lossless .............. Encode image losslessly.
   -hint <string> ......... Specify image characteristics hint.
-                           One of: photo or picture
+                           One of: photo, picture or graph.
 
   -short ................. condense printed message
   -quiet ................. don't print anything.
diff --git a/examples/cwebp.c b/examples/cwebp.c
index 2ca8d7e2..2348d272 100644
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@@ -844,7 +844,7 @@ static void HelpLong(void) {
   printf("  -noalpha ............... discard any transparency information.\n");
   printf("  -lossless .............. Encode image losslessly.\n");
   printf("  -hint <string> ......... Specify image characteristics hint.\n");
-  printf("                           One of: photo or picture\n");
+  printf("                           One of: photo, picture or graph\n");
 
   printf("\n");
   printf("  -short ................. condense printed message\n");
@@ -973,6 +973,8 @@ int main(int argc, const char *argv[]) {
         config.image_hint = WEBP_HINT_PHOTO;
       } else if (!strcmp(argv[c], "picture")) {
         config.image_hint = WEBP_HINT_PICTURE;
+      } else if (!strcmp(argv[c], "graph")) {
+        config.image_hint = WEBP_HINT_GRAPH;
       } else {
         fprintf(stderr, "Error! Unrecognized image hint: %s\n", argv[c]);
         goto Error;
diff --git a/man/cwebp.1 b/man/cwebp.1
index 181c315e..fab8517e 100644
--- a/man/cwebp.1
+++ b/man/cwebp.1
@@ -164,7 +164,7 @@ Encode the image without any loss.
 .TP
 .B \-hint string
 Specify the hint about input image type. Possible values are:
-\fBphoto\fP, and \fBpicture\fP.
+\fBphoto\fP, \fBpicture\fP or \fBgraph\fP.
 .TP
 .B \-noasm
 Disable all assembly optimizations.
diff --git a/src/enc/config.c b/src/enc/config.c
index fa11e89a..1a261135 100644
--- a/src/enc/config.c
+++ b/src/enc/config.c
@@ -120,7 +120,7 @@ int WebPValidateConfig(const WebPConfig* config) {
     return 0;
   if (config->lossless < 0 || config->lossless > 1)
     return 0;
-  if (config->image_hint > WEBP_HINT_PHOTO)
+  if (config->image_hint >= WEBP_HINT_LAST)
     return 0;
   return 1;
 }
diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c
index 82bef500..f40b5e3f 100644
--- a/src/enc/vp8l.c
+++ b/src/enc/vp8l.c
@@ -141,7 +141,7 @@ static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) {
   const WebPPicture* const pic = enc->pic_;
   assert(pic != NULL && pic->argb != NULL);
 
-  enc->use_palette_ =
+  enc->use_palette_ = (image_hint == WEBP_HINT_GRAPH) ? 0 :
       AnalyzeAndCreatePalette(pic, enc->palette_, &enc->palette_size_);
   if (!enc->use_palette_) {
     if (image_hint == WEBP_HINT_DEFAULT) {
@@ -162,7 +162,6 @@ static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) {
   return 1;
 }
 
-
 static int GetHuffBitLengthsAndCodes(
     const VP8LHistogramSet* const histogram_image,
     HuffmanTreeCode* const huffman_codes) {
diff --git a/src/webp/encode.h b/src/webp/encode.h
index 1ee42819..8c89626c 100644
--- a/src/webp/encode.h
+++ b/src/webp/encode.h
@@ -69,7 +69,9 @@ WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra,
 typedef enum {
   WEBP_HINT_DEFAULT = 0,  // default preset.
   WEBP_HINT_PICTURE,      // digital picture, like portrait, inner shot
-  WEBP_HINT_PHOTO         // outdoor photograph, with natural lighting
+  WEBP_HINT_PHOTO,        // outdoor photograph, with natural lighting
+  WEBP_HINT_GRAPH,        // Discrete tone image (graph, map-tile etc).
+  WEBP_HINT_LAST
 } WebPImageHint;
 
 typedef struct {

From 906be65744e6a71ae21fcce5d77630f89cb4a628 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Wed, 1 Aug 2012 00:32:12 -0700
Subject: [PATCH 15/42] rationalize use of color-cache

* ~1-4% faster
* if it's not used, don't use it
* remove the special handling of cache_bits = 0
* remove some tests in the loops

Change-Id: I19d87c3ca731052ff532ea8b2d8e89816507b75f
---
 src/enc/backward_references.c | 104 ++++++++++++++++++++++------------
 src/utils/color_cache.c       |   6 +-
 2 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/src/enc/backward_references.c b/src/enc/backward_references.c
index 40819f99..7e317bec 100644
--- a/src/enc/backward_references.c
+++ b/src/enc/backward_references.c
@@ -256,8 +256,10 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
   VP8LColorCache hashers;
 
   if (hash_chain == NULL) return 0;
-  cc_init = VP8LColorCacheInit(&hashers, cache_bits);
-  if (!cc_init) goto Error;
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
 
   if (!HashChainInit(hash_chain, pix_count)) goto Error;
 
@@ -289,15 +291,16 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
         HashChainFindCopy(hash_chain, quality,
                           i + 1, xsize, argb, maxlen, &offset2, &len2);
         if (len2 > len + 1) {
+          const uint32_t pixel = argb[i];
           // Alternative#2 is a better match. So push pixel at 'i' as literal.
-          if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
-            const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]);
+          if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
+            const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
             refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix);
           } else {
-            refs->refs[refs->size] = PixOrCopyCreateLiteral(argb[i]);
+            refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel);
           }
           ++refs->size;
-          VP8LColorCacheInsert(&hashers, argb[i]);
+          if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
           i++;  // Backward reference to be done for next pixel.
           len = len2;
           offset = offset2;
@@ -307,24 +310,30 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
         len = MAX_LENGTH - 1;
       }
       refs->refs[refs->size++] = PixOrCopyCreateCopy(offset, len);
-      for (k = 0; k < len; ++k) {
-        VP8LColorCacheInsert(&hashers, argb[i + k]);
-        if (k != 0 && i + k + 1 < pix_count) {
-          // Add to the hash_chain (but cannot add the last pixel).
+      if (use_color_cache) {
+        for (k = 0; k < len; ++k) {
+          VP8LColorCacheInsert(&hashers, argb[i + k]);
+        }
+      }
+      // Add to the hash_chain (but cannot add the last pixel).
+      {
+        const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+        for (k = 1; k < last; ++k) {
           HashChainInsert(hash_chain, &argb[i + k], i + k);
         }
       }
       i += len;
     } else {
-      if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
+      const uint32_t pixel = argb[i];
+      if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
         // push pixel as a PixOrCopyCreateCacheIdx pixel
-        int ix = VP8LColorCacheGetIndex(&hashers, argb[i]);
+        const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
         refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix);
       } else {
-        refs->refs[refs->size] = PixOrCopyCreateLiteral(argb[i]);
+        refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel);
       }
       ++refs->size;
-      VP8LColorCacheInsert(&hashers, argb[i]);
+      if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
       if (i + 1 < pix_count) {
         HashChainInsert(hash_chain, &argb[i], i);
       }
@@ -437,8 +446,12 @@ static int BackwardReferencesHashChainDistanceOnly(
 
   if (cost == NULL || cost_model == NULL || hash_chain == NULL) goto Error;
 
-  cc_init = VP8LColorCacheInit(&hashers, cache_bits);
-  if (!cc_init || !HashChainInit(hash_chain, pix_count)) goto Error;
+  if (!HashChainInit(hash_chain, pix_count)) goto Error;
+
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
 
   if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
                       cache_bits)) {
@@ -486,14 +499,20 @@ static int BackwardReferencesHashChainDistanceOnly(
           // Long copy for short distances, let's skip the middle
           // lookups for better copies.
           // 1) insert the hashes.
-          for (k = 0; k < len; ++k) {
-            VP8LColorCacheInsert(&hashers, argb[i + k]);
-            if (i + k + 1 < pix_count) {
-              // Add to the hash_chain (but cannot add the last pixel).
+          if (use_color_cache) {
+            for (k = 0; k < len; ++k) {
+              VP8LColorCacheInsert(&hashers, argb[i + k]);
+            }
+          }
+          // 2) Add to the hash_chain (but cannot add the last pixel)
+          {
+            const int last = (len < pix_count - 1 - i) ? len
+                                                       : pix_count - 1 - i;
+            for (k = 0; k < last; ++k) {
               HashChainInsert(hash_chain, &argb[i + k], i + k);
             }
           }
-          // 2) jump.
+          // 3) jump.
           i += len - 1;  // for loop does ++i, thus -1 here.
           goto next_symbol;
         }
@@ -515,7 +534,7 @@ static int BackwardReferencesHashChainDistanceOnly(
         cost[i] = cost_val;
         dist_array[i] = 1;  // only one is inserted.
       }
-      VP8LColorCacheInsert(&hashers, argb[i]);
+      if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
     }
  next_symbol: ;
   }
@@ -574,11 +593,13 @@ static int BackwardReferencesHashChainFollowChosenPath(
   HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
   VP8LColorCache hashers;
 
-  if (hash_chain == NULL ||
-      !(cc_init = VP8LColorCacheInit(&hashers, cache_bits)) ||
-      !HashChainInit(hash_chain, pix_count)) {
+  if (hash_chain == NULL || !HashChainInit(hash_chain, pix_count)) {
     goto Error;
   }
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
 
   refs->size = 0;
   for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
@@ -590,10 +611,14 @@ static int BackwardReferencesHashChainFollowChosenPath(
                         i, xsize, argb, maxlen, &offset, &len);
       assert(len == maxlen);
       refs->refs[size] = PixOrCopyCreateCopy(offset, len);
-      for (k = 0; k < len; ++k) {
-        VP8LColorCacheInsert(&hashers, argb[i + k]);
-        if (i + k + 1 < pix_count) {
-          // Add to the hash_chain (but cannot add the last pixel).
+      if (use_color_cache) {
+        for (k = 0; k < len; ++k) {
+          VP8LColorCacheInsert(&hashers, argb[i + k]);
+        }
+      }
+      {
+        const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+        for (k = 0; k < last; ++k) {
           HashChainInsert(hash_chain, &argb[i + k], i + k);
         }
       }
@@ -606,7 +631,7 @@ static int BackwardReferencesHashChainFollowChosenPath(
       } else {
         refs->refs[size] = PixOrCopyCreateLiteral(argb[i]);
       }
-      VP8LColorCacheInsert(&hashers, argb[i]);
+      if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
       if (i + 1 < pix_count) {
         HashChainInsert(hash_chain, &argb[i], i);
       }
@@ -755,13 +780,18 @@ static int ComputeCacheHistogram(const uint32_t* const argb,
   int i;
   uint32_t k;
   VP8LColorCache hashers;
+  const int use_color_cache = (cache_bits > 0);
+  int cc_init = 0;
 
-  if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) return 0;
+  }
 
   for (i = 0; i < refs->size; ++i) {
     const PixOrCopy* const v = &refs->refs[i];
     if (PixOrCopyIsLiteral(v)) {
-      if (cache_bits != 0 &&
+      if (use_color_cache &&
           VP8LColorCacheContains(&hashers, argb[pixel_index])) {
         // push pixel as a cache index
         const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]);
@@ -773,15 +803,17 @@ static int ComputeCacheHistogram(const uint32_t* const argb,
     } else {
       VP8LHistogramAddSinglePixOrCopy(histo, v);
     }
-    for (k = 0; k < PixOrCopyLength(v); ++k) {
-      VP8LColorCacheInsert(&hashers, argb[pixel_index]);
-      ++pixel_index;
+    if (use_color_cache) {
+      for (k = 0; k < PixOrCopyLength(v); ++k) {
+        VP8LColorCacheInsert(&hashers, argb[pixel_index + k]);
+      }
     }
+    pixel_index += PixOrCopyLength(v);
   }
   assert(pixel_index == xsize * ysize);
   (void)xsize;  // xsize is not used in non-debug compilations otherwise.
   (void)ysize;  // ysize is not used in non-debug compilations otherwise.
-  VP8LColorCacheClear(&hashers);
+  if (cc_init) VP8LColorCacheClear(&hashers);
   return 1;
 }
 
diff --git a/src/utils/color_cache.c b/src/utils/color_cache.c
index 1bb360f1..1d20c9c9 100644
--- a/src/utils/color_cache.c
+++ b/src/utils/color_cache.c
@@ -21,11 +21,9 @@ extern "C" {
 // VP8LColorCache.
 
 int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
-  int hash_size;
+  const int hash_size = 1 << hash_bits;
   assert(cc != NULL);
-
-  if (hash_bits == 0) hash_bits = 1;
-  hash_size = 1 << hash_bits;
+  assert(hash_bits > 0);
   cc->colors_ = (uint32_t*)calloc(hash_size, sizeof(*cc->colors_));
   if (cc->colors_ == NULL) return 0;
   cc->hash_shift_ = 32 - hash_bits;

From c19333173ab5b3a7db155169f162af2939f36df3 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Wed, 1 Aug 2012 00:37:24 -0700
Subject: [PATCH 16/42] extra size check for security

no speed diff observed by removing the test before calling BitWriterResize().

+ remove some unnecessary memset() in VP8LBitWriter
+ fix mixed code/variable-decl in BIG_ENDIAN mode

Change-Id: I36be61f83d10a43e4682b680c2dae0e494da4218
---
 src/utils/bit_writer.c | 74 ++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 31 deletions(-)

diff --git a/src/utils/bit_writer.c b/src/utils/bit_writer.c
index fcb0a3c9..671159ca 100644
--- a/src/utils/bit_writer.c
+++ b/src/utils/bit_writer.c
@@ -25,18 +25,23 @@ extern "C" {
 static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) {
   uint8_t* new_buf;
   size_t new_size;
-  const size_t needed_size = bw->pos_ + extra_size;
+  const uint64_t needed_size_64b = (uint64_t)bw->pos_ + extra_size;
+  const size_t needed_size = (size_t)needed_size_64b;
+  if (needed_size_64b != needed_size) {
+    bw->error_ = 1;
+    return 0;
+  }
   if (needed_size <= bw->max_pos_) return 1;
+  // If the following line wraps over 32bit, the test just after will catch it.
   new_size = 2 * bw->max_pos_;
-  if (new_size < needed_size)
-    new_size = needed_size;
+  if (new_size < needed_size) new_size = needed_size;
   if (new_size < 1024) new_size = 1024;
   new_buf = (uint8_t*)malloc(new_size);
   if (new_buf == NULL) {
     bw->error_ = 1;
     return 0;
   }
-  if (bw->pos_ > 0) memcpy(new_buf, bw->buf_, bw->pos_);
+  memcpy(new_buf, bw->buf_, bw->pos_);
   free(bw->buf_);
   bw->buf_ = new_buf;
   bw->max_pos_ = new_size;
@@ -51,10 +56,8 @@ static void kFlush(VP8BitWriter* const bw) {
   bw->nb_bits_ -= 8;
   if ((bits & 0xff) != 0xff) {
     size_t pos = bw->pos_;
-    if (pos + bw->run_ >= bw->max_pos_) {  // reallocate
-      if (!BitWriterResize(bw,  bw->run_ + 1)) {
-        return;
-      }
+    if (!BitWriterResize(bw, bw->run_ + 1)) {
+      return;
     }
     if (bits & 0x100) {  // overflow -> propagate carry over pending 0xff's
       if (pos > 0) bw->buf_[pos - 1]++;
@@ -194,23 +197,28 @@ void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
 static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) {
   uint8_t* allocated_buf;
   size_t allocated_size;
-  const size_t size_required = VP8LBitWriterNumBytes(bw) + extra_size;
-  if ((bw->max_bytes_ > 0) && (size_required <= bw->max_bytes_)) return 1;
-  allocated_size = (3 * bw->max_bytes_) >> 1;
-  if (allocated_size < size_required) {
-    allocated_size = size_required;
+  const size_t current_size = VP8LBitWriterNumBytes(bw);
+  const uint64_t size_required_64b = (uint64_t)current_size + extra_size;
+  const size_t size_required = (size_t)size_required_64b;
+  if (size_required != size_required_64b) {
+    bw->error_ = 1;
+    return 0;
   }
-  // Make Allocated size multiple of KBs
+  if (bw->max_bytes_ > 0 && size_required <= bw->max_bytes_) return 1;
+  allocated_size = (3 * bw->max_bytes_) >> 1;
+  if (allocated_size < size_required) allocated_size = size_required;
+  // make allocated size multiple of 1k
   allocated_size = (((allocated_size >> 10) + 1) << 10);
   allocated_buf = (uint8_t*)malloc(allocated_size);
-  if (allocated_buf == NULL) return 0;
-  memset(allocated_buf, 0, allocated_size);
-  if (bw->bit_pos_ > 0) {
-    memcpy(allocated_buf, bw->buf_, VP8LBitWriterNumBytes(bw));
+  if (allocated_buf == NULL) {
+    bw->error_ = 1;
+    return 0;
   }
+  memcpy(allocated_buf, bw->buf_, current_size);
   free(bw->buf_);
   bw->buf_ = allocated_buf;
   bw->max_bytes_ = allocated_size;
+  memset(allocated_buf + current_size, 0, allocated_size - current_size);
   return 1;
 }
 
@@ -232,33 +240,37 @@ void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits) {
   // Technically, this branch of the code can write up to 25 bits at a time,
   // but in prefix encoding, the maximum number of bits written is 18 at a time.
   {
-    uint8_t* p = &bw->buf_[bw->bit_pos_ >> 3];
-    uint32_t v = *(const uint32_t*)(p);
+    uint8_t* const p = &bw->buf_[bw->bit_pos_ >> 3];
+    uint32_t v = *(const uint32_t*)p;
     v |= bits << (bw->bit_pos_ & 7);
-    *(uint32_t*)(p) = v;
+    *(uint32_t*)p = v;
     bw->bit_pos_ += n_bits;
   }
-#else  // LITTLE_ENDIAN
-  // implicit & 0xff is assumed for uint8_t arithmetics
+#else  // BIG_ENDIAN
   {
     uint8_t* p = &bw->buf_[bw->bit_pos_ >> 3];
-    const int bits_reserved_in_first_byte = (bw->bit_pos_ & 7);
-    *p++ |= (bits << bits_reserved_in_first_byte);
+    const int bits_reserved_in_first_byte = bw->bit_pos_ & 7;
     const int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
+    // implicit & 0xff is assumed for uint8_t arithmetics
+    *p++ |= bits << bits_reserved_in_first_byte;
+    bits >>= 8 - bits_reserved_in_first_byte;
     if (bits_left_to_write >= 1) {
-      *p++ = bits >> (8 - bits_reserved_in_first_byte);
+      *p++ = bits;
+      bits >>= 8;
       if (bits_left_to_write >= 9) {
-        *p++ = bits >> (16 - bits_reserved_in_first_byte);
+        *p++ = bits;
+        bits >>= 8;
       }
     }
     assert(n_bits <= 25);
-    *p = bits >> (24 - bits_reserved_in_first_byte);
+    *p = bits;
     bw->bit_pos_ += n_bits;
   }
-#endif  // BIG_ENDIAN
+#endif
   if ((bw->bit_pos_ >> 3) > (bw->max_bytes_ - 8)) {
-    const size_t kAdditionalBuffer = 32768 + bw->max_bytes_;
-    if (!VP8LBitWriterResize(bw, kAdditionalBuffer)) {
+    const uint64_t extra_size = 32768ULL + bw->max_bytes_;
+    if (extra_size != (size_t)extra_size ||
+        !VP8LBitWriterResize(bw, (size_t)extra_size)) {
       bw->bit_pos_ = 0;
       bw->error_ = 1;
     }

From bff34ac1ca0de6aa381da43d7e11e1e861b6dcca Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Wed, 1 Aug 2012 12:06:04 -0700
Subject: [PATCH 17/42] harness some malloc/calloc to use WebPSafeMalloc and
 WebPSafeCalloc

quite a large security sweep.

Change-Id: If150dfbb46e6e9b56210473a109c8ad6ccd0cea4
---
 src/dec/buffer.c              |  8 ++---
 src/dec/frame.c               |  4 ++-
 src/dec/idec.c                | 12 ++++----
 src/dec/vp8.c                 |  2 +-
 src/dec/vp8l.c                | 32 ++++++++------------
 src/enc/alpha.c               |  2 ++
 src/enc/analysis.c            |  4 ++-
 src/enc/backward_references.c | 14 +++++----
 src/enc/histogram.c           |  9 +++---
 src/enc/picture.c             | 31 +++++++++-----------
 src/enc/vp8l.c                | 55 ++++++++++++++++++-----------------
 src/enc/webpenc.c             | 18 +++++++-----
 src/utils/color_cache.c       |  4 ++-
 src/utils/huffman.c           |  8 +++--
 src/utils/huffman_encode.c    |  3 +-
 15 files changed, 108 insertions(+), 98 deletions(-)

diff --git a/src/dec/buffer.c b/src/dec/buffer.c
index e8421785..e491d4c4 100644
--- a/src/dec/buffer.c
+++ b/src/dec/buffer.c
@@ -13,6 +13,7 @@
 
 #include "./vp8i.h"
 #include "./webpi.h"
+#include "../utils/utils.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
@@ -95,14 +96,11 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
     total_size = size + 2 * uv_size + a_size;
 
     // Security/sanity checks
-    if (((size_t)total_size != total_size) || (total_size >= (1ULL << 40))) {
-      return VP8_STATUS_INVALID_PARAM;
-    }
-
-    buffer->private_memory = output = (uint8_t*)malloc((size_t)total_size);
+    output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output));
     if (output == NULL) {
       return VP8_STATUS_OUT_OF_MEMORY;
     }
+    buffer->private_memory = output;
 
     if (!WebPIsRGBMode(mode)) {   // YUVA initialization
       WebPYUVABuffer* const buf = &buffer->u.YUVA;
diff --git a/src/dec/frame.c b/src/dec/frame.c
index 1a444d13..9c91a48e 100644
--- a/src/dec/frame.c
+++ b/src/dec/frame.c
@@ -11,6 +11,7 @@
 
 #include <stdlib.h>
 #include "./vp8i.h"
+#include "../utils/utils.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
@@ -435,11 +436,12 @@ static int AllocateMemory(VP8Decoder* const dec) {
   if (needed > dec->mem_size_) {
     free(dec->mem_);
     dec->mem_size_ = 0;
-    dec->mem_ = (uint8_t*)malloc((size_t)needed);
+    dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
     if (dec->mem_ == NULL) {
       return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
                          "no memory during frame initialization.");
     }
+    // down-cast is ok, thanks to WebPSafeAlloc() above.
     dec->mem_size_ = (size_t)needed;
   }
 
diff --git a/src/dec/idec.c b/src/dec/idec.c
index c7ab6f6e..7205991c 100644
--- a/src/dec/idec.c
+++ b/src/dec/idec.c
@@ -15,6 +15,7 @@
 
 #include "./webpi.h"
 #include "./vp8i.h"
+#include "../utils/utils.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
@@ -143,14 +144,15 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
 
   if (mem->end_ + data_size > mem->buf_size_) {  // Need some free memory
     const size_t current_size = MemDataSize(mem);
-    const size_t new_size = current_size + data_size;
-    const size_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
-    uint8_t* const new_buf = (uint8_t*)malloc(extra_size);
+    const uint64_t new_size = (uint64_t)current_size + data_size;
+    const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
+    uint8_t* const new_buf =
+        (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
     if (new_buf == NULL) return 0;
     memcpy(new_buf, old_base, current_size);
     free(mem->buf_);
     mem->buf_ = new_buf;
-    mem->buf_size_ = extra_size;
+    mem->buf_size_ = (size_t)extra_size;
     mem->start_ = 0;
     mem->end_ = current_size;
   }
@@ -534,7 +536,7 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
 // Public functions
 
 WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
-  WebPIDecoder* idec = (WebPIDecoder*)calloc(1, sizeof(WebPIDecoder));
+  WebPIDecoder* idec = (WebPIDecoder*)calloc(1, sizeof(*idec));
   if (idec == NULL) {
     return NULL;
   }
diff --git a/src/dec/vp8.c b/src/dec/vp8.c
index 5db7d546..b0ccfa2a 100644
--- a/src/dec/vp8.c
+++ b/src/dec/vp8.c
@@ -45,7 +45,7 @@ int VP8InitIoInternal(VP8Io* const io, int version) {
 }
 
 VP8Decoder* VP8New(void) {
-  VP8Decoder* const dec = (VP8Decoder*)calloc(1, sizeof(VP8Decoder));
+  VP8Decoder* const dec = (VP8Decoder*)calloc(1, sizeof(*dec));
   if (dec != NULL) {
     SetOk(dec);
     WebPWorkerInit(&dec->worker_);
diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index e5d5eec6..398faf91 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -15,6 +15,7 @@
 #include "./vp8li.h"
 #include "../dsp/lossless.h"
 #include "../utils/huffman.h"
+#include "../utils/utils.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
@@ -264,7 +265,8 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
       return 0;
     }
 
-    code_lengths = (int*)calloc(alphabet_size, sizeof(*code_lengths));
+    code_lengths =
+        (int*)WebPSafeCalloc((uint64_t)alphabet_size, sizeof(*code_lengths));
     if (code_lengths == NULL) {
       dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
       return 0;
@@ -335,7 +337,9 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
   if (br->error_) goto Error;
 
   assert(num_htree_groups <= 0x10000);
-  htree_groups = (HTreeGroup*)calloc(num_htree_groups, sizeof(*htree_groups));
+  htree_groups =
+      (HTreeGroup*)WebPSafeCalloc((uint64_t)num_htree_groups,
+                                  sizeof(*htree_groups));
   if (htree_groups == NULL) {
     dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
     goto Error;
@@ -380,10 +384,7 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
   const uint64_t memory_size = sizeof(*dec->rescaler) +
                                work_size * sizeof(*work) +
                                scaled_data_size * sizeof(*scaled_data);
-  uint8_t* memory;
-
-  if (memory_size != (size_t)memory_size) return 0;  // overflow check
-  memory = (uint8_t*)calloc(1, (size_t)memory_size);
+  uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory));
   if (memory == NULL) {
     dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
     return 0;
@@ -700,7 +701,8 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
   int i;
   const int final_num_colors = 1 << (8 >> transform->bits_);
   uint32_t* const new_color_map =
-      (uint32_t*)malloc(final_num_colors * sizeof(*new_color_map));
+      (uint32_t*)WebPSafeMalloc((uint64_t)final_num_colors,
+                                sizeof(*new_color_map));
   if (new_color_map == NULL) {
     return 0;
   } else {
@@ -892,15 +894,8 @@ static int DecodeImageStream(int xsize, int ysize,
   }
 
   {
-    const uint64_t total_size =
-        transform_xsize * transform_ysize * sizeof(*data);
-    if (total_size != (size_t)total_size) {
-      // This shouldn't happen, because of transform_bits limit, but...
-      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
-      ok = 0;
-      goto End;
-    }
-    data = (uint32_t*)malloc((size_t)total_size);
+    const uint64_t total_size = (uint64_t)transform_xsize * transform_ysize;
+    data = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*data));
     if (data == NULL) {
       dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
       ok = 0;
@@ -951,12 +946,9 @@ static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) {
   const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
   const uint64_t total_num_pixels =
       num_pixels + cache_top_pixels + cache_pixels;
-  const uint64_t total_size = total_num_pixels * sizeof(*dec->argb_);
 
   assert(dec->width_ <= final_width);
-  // Check for overflow
-  if ((size_t)total_size != total_size) return 0;
-  dec->argb_ = (uint32_t*)malloc((size_t)total_size);
+  dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_));
   if (dec->argb_ == NULL) {
     dec->argb_cache_ = NULL;
     dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
diff --git a/src/enc/alpha.c b/src/enc/alpha.c
index 97538da9..d8e12346 100644
--- a/src/enc/alpha.c
+++ b/src/enc/alpha.c
@@ -111,6 +111,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
   size_t expected_size;
   const size_t data_size = width * height;
 
+  assert((uint64_t)data_size == (uint64_t)width * height);  // as per spec
   assert(filter >= 0 && filter < WEBP_FILTER_LAST);
   assert(method >= ALPHA_NO_COMPRESSION);
   assert(method <= ALPHA_LOSSLESS_COMPRESSION);
@@ -171,6 +172,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
   const int reduce_levels = (quality < 100);
 
   // quick sanity checks
+  assert((uint64_t)data_size == (uint64_t)width * height);  // as per spec
   assert(enc != NULL && pic != NULL && pic->a != NULL);
   assert(output != NULL && output_size != NULL);
   assert(width > 0 && height > 0);
diff --git a/src/enc/analysis.c b/src/enc/analysis.c
index 0eec9472..9af0325e 100644
--- a/src/enc/analysis.c
+++ b/src/enc/analysis.c
@@ -15,6 +15,7 @@
 
 #include "./vp8enci.h"
 #include "./cost.h"
+#include "../utils/utils.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
@@ -35,7 +36,8 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
   const int w = enc->mb_w_;
   const int h = enc->mb_h_;
   const int majority_cnt_3_x_3_grid = 5;
-  uint8_t* const tmp = (uint8_t*)malloc(w * h * sizeof(uint8_t));
+  uint8_t* const tmp = (uint8_t*)WebPSafeMalloc((uint64_t)w * h, sizeof(*tmp));
+  assert((uint64_t)(w * h) == (uint64_t)w * h);   // no overflow, as per spec
 
   if (tmp == NULL) return;
   for (y = 1; y < h - 1; ++y) {
diff --git a/src/enc/backward_references.c b/src/enc/backward_references.c
index 7e317bec..b20a1fcf 100644
--- a/src/enc/backward_references.c
+++ b/src/enc/backward_references.c
@@ -15,6 +15,7 @@
 #include "./backward_references.h"
 #include "./histogram.h"
 #include "../utils/color_cache.h"
+#include "../utils/utils.h"
 
 #define VALUES_IN_BYTE 256
 
@@ -93,7 +94,8 @@ int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size) {
   assert(refs != NULL);
   refs->size = 0;
   refs->max_size = 0;
-  refs->refs = (PixOrCopy*)malloc(max_size * sizeof(*refs->refs));
+  refs->refs = (PixOrCopy*)WebPSafeMalloc((uint64_t)max_size,
+                                          sizeof(*refs->refs));
   if (refs->refs == NULL) return 0;
   refs->max_size = max_size;
   return 1;
@@ -110,7 +112,7 @@ static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) {
 
 static int HashChainInit(HashChain* const p, int size) {
   int i;
-  p->chain_ = (int*)malloc(size * sizeof(*p->chain_));
+  p->chain_ = (int*)WebPSafeMalloc((uint64_t)size, sizeof(*p->chain_));
   if (p->chain_ == NULL) {
     return 0;
   }
@@ -437,7 +439,8 @@ static int BackwardReferencesHashChainDistanceOnly(
   const int quality = 100;
   const int pix_count = xsize * ysize;
   const int use_color_cache = (cache_bits > 0);
-  double* const cost = (double*)malloc(pix_count * sizeof(*cost));
+  double* const cost =
+      (double*)WebPSafeMalloc((uint64_t)pix_count, sizeof(*cost));
   CostModel* cost_model = (CostModel*)malloc(sizeof(*cost_model));
   HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
   VP8LColorCache hashers;
@@ -564,7 +567,8 @@ static int TraceBackwards(const uint32_t* const dist_array,
   }
   // Allocate.
   *chosen_path_size = count;
-  *chosen_path = (uint32_t*)malloc(count * sizeof(*chosen_path));
+  *chosen_path =
+      (uint32_t*)WebPSafeMalloc((uint64_t)count, sizeof(**chosen_path));
   if (*chosen_path == NULL) return 0;
 
   // Write in reverse order.
@@ -658,7 +662,7 @@ static int BackwardReferencesTraceBackwards(int xsize, int ysize,
   uint32_t* chosen_path = NULL;
   int chosen_path_size = 0;
   uint32_t* dist_array =
-      (uint32_t*)malloc(dist_array_size * sizeof(*dist_array));
+      (uint32_t*)WebPSafeMalloc((uint64_t)dist_array_size, sizeof(*dist_array));
 
   if (dist_array == NULL) goto Error;
 
diff --git a/src/enc/histogram.c b/src/enc/histogram.c
index 6a35eda3..da3b3d74 100644
--- a/src/enc/histogram.c
+++ b/src/enc/histogram.c
@@ -17,6 +17,7 @@
 #include "./backward_references.h"
 #include "./histogram.h"
 #include "../dsp/lossless.h"
+#include "../utils/utils.h"
 
 #if defined(_MSC_VER) && !defined(NOT_HAVE_LOG2)
 # define NOT_HAVE_LOG2 1
@@ -65,10 +66,10 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
   int i;
   VP8LHistogramSet* set;
   VP8LHistogram* bulk;
-  const size_t total_size = sizeof(*set)
-                          + size * sizeof(*set->histograms)
-                          + size * sizeof(**set->histograms);
-  uint8_t* memory = (uint8_t*)malloc(total_size);
+  const uint64_t total_size = (uint64_t)sizeof(*set)
+                            + size * sizeof(*set->histograms)
+                            + size * sizeof(**set->histograms);
+  uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
   if (memory == NULL) return NULL;
 
   set = (VP8LHistogramSet*)memory;
diff --git a/src/enc/picture.c b/src/enc/picture.c
index f8ca19db..ce1b6cea 100644
--- a/src/enc/picture.c
+++ b/src/enc/picture.c
@@ -15,6 +15,7 @@
 
 #include "./vp8enci.h"
 #include "../utils/rescaler.h"
+#include "../utils/utils.h"
 #include "../dsp/dsp.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
@@ -81,14 +82,12 @@ int WebPPictureAlloc(WebPPicture* picture) {
 
       // Security and validation checks
       if (width <= 0 || height <= 0 ||         // luma/alpha param error
-          uv_width < 0 || uv_height < 0 ||     // u/v param error
-          y_size >= (1ULL << 40) ||            // reasonable global size
-          (size_t)total_size != total_size) {  // overflow on 32bit
+          uv_width < 0 || uv_height < 0) {     // u/v param error
         return 0;
       }
       // Clear previous buffer and allocate a new one.
       WebPPictureFree(picture);   // erase previous buffer
-      mem = (uint8_t*)malloc((size_t)total_size);
+      mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem));
       if (mem == NULL) return 0;
 
       // From now on, we're in the clear, we can no longer fail...
@@ -119,15 +118,12 @@ int WebPPictureAlloc(WebPPicture* picture) {
     } else {
       void* memory;
       const uint64_t argb_size = (uint64_t)width * height;
-      const uint64_t total_size = argb_size * sizeof(*picture->argb);
-      if (width <= 0 || height <= 0 ||
-          argb_size >= (1ULL << 40) ||
-          (size_t)total_size != total_size) {
+      if (width <= 0 || height <= 0) {
         return 0;
       }
       // Clear previous buffer and allocate a new one.
       WebPPictureFree(picture);   // erase previous buffer
-      memory = malloc((size_t)total_size);
+      memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb));
       if (memory == NULL) return 0;
 
       // TODO(skal): align plane to cache line?
@@ -416,7 +412,7 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
   if (!WebPPictureAlloc(&tmp)) return 0;
 
   if (!pic->use_argb) {
-    work = (int32_t*)malloc(2 * width * sizeof(*work));
+    work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
     if (work == NULL) {
       WebPPictureFree(&tmp);
       return 0;
@@ -449,7 +445,7 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
     }
 #endif
   } else {
-    work = (int32_t*)malloc(2 * width * 4 * sizeof(*work));
+    work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
     if (work == NULL) {
       WebPPictureFree(&tmp);
       return 0;
@@ -480,17 +476,17 @@ void WebPMemoryWriterInit(WebPMemoryWriter* writer) {
 int WebPMemoryWrite(const uint8_t* data, size_t data_size,
                     const WebPPicture* picture) {
   WebPMemoryWriter* const w = (WebPMemoryWriter*)picture->custom_ptr;
-  size_t next_size;
+  uint64_t next_size;
   if (w == NULL) {
     return 1;
   }
-  next_size = w->size + data_size;
+  next_size = (uint64_t)w->size + data_size;
   if (next_size > w->max_size) {
     uint8_t* new_mem;
-    size_t next_max_size = w->max_size * 2;
+    uint64_t next_max_size = 2ULL * w->max_size;
     if (next_max_size < next_size) next_max_size = next_size;
-    if (next_max_size < 8192) next_max_size = 8192;
-    new_mem = (uint8_t*)malloc(next_max_size);
+    if (next_max_size < 8192ULL) next_max_size = 8192ULL;
+    new_mem = (uint8_t*)WebPSafeMalloc(next_max_size, 1);
     if (new_mem == NULL) {
       return 0;
     }
@@ -499,7 +495,8 @@ int WebPMemoryWrite(const uint8_t* data, size_t data_size,
     }
     free(w->mem);
     w->mem = new_mem;
-    w->max_size = next_max_size;
+    // down-cast is ok, thanks to WebPSafeMalloc
+    w->max_size = (size_t)next_max_size;
   }
   if (data_size > 0) {
     memcpy(w->mem + w->size, data, data_size);
diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c
index f40b5e3f..d4d90112 100644
--- a/src/enc/vp8l.c
+++ b/src/enc/vp8l.c
@@ -20,6 +20,7 @@
 #include "../dsp/lossless.h"
 #include "../utils/bit_writer.h"
 #include "../utils/huffman_encode.h"
+#include "../utils/utils.h"
 #include "../webp/format_constants.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
@@ -106,7 +107,8 @@ static int AnalyzeEntropy(const WebPPicture* const pic,
   uint32_t last_pix = argb[0];    // so we're sure that pix_diff == 0
 
   VP8LHistogram* nonpredicted = NULL;
-  VP8LHistogram* predicted = (VP8LHistogram*)malloc(2 * sizeof(*predicted));
+  VP8LHistogram* predicted =
+      (VP8LHistogram*)malloc(2 * sizeof(*predicted));
   if (predicted == NULL) return 0;
   nonpredicted = predicted + 1;
 
@@ -167,7 +169,7 @@ static int GetHuffBitLengthsAndCodes(
     HuffmanTreeCode* const huffman_codes) {
   int i, k;
   int ok = 1;
-  int total_length_size = 0;
+  uint64_t total_length_size = 0;
   uint8_t* mem_buf = NULL;
   const int histogram_image_size = histogram_image->size;
 
@@ -188,9 +190,8 @@ static int GetHuffBitLengthsAndCodes(
   {
     uint16_t* codes;
     uint8_t* lengths;
-    const size_t total_buf_size = total_length_size * sizeof(*lengths)
-                                + total_length_size * sizeof(*codes);
-    mem_buf = (uint8_t*)calloc(total_buf_size, 1);
+    mem_buf = (uint8_t*)WebPSafeCalloc(total_length_size,
+                                       sizeof(*lengths) + sizeof(*codes));
     if (mem_buf == NULL) {
       ok = 0;
       goto End;
@@ -292,7 +293,7 @@ static int StoreFullHuffmanCode(VP8LBitWriter* const bw,
   int num_tokens;
   HuffmanTreeCode huffman_code;
   HuffmanTreeToken* const tokens =
-      (HuffmanTreeToken*)malloc(max_tokens * sizeof(*tokens));
+      (HuffmanTreeToken*)WebPSafeMalloc((uint64_t)max_tokens, sizeof(*tokens));
   if (tokens == NULL) return 0;
 
   huffman_code.num_symbols = CODE_LENGTH_CODES;
@@ -499,21 +500,21 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
                                const uint32_t* const argb,
                                int width, int height, int quality,
                                int cache_bits, int histogram_bits) {
-  int i;
   int ok = 0;
   const int use_2d_locality = 1;
   const int use_color_cache = (cache_bits > 0);
-  const int histogram_image_xysize =
+  const uint32_t histogram_image_xysize =
       VP8LSubSampleSize(width, histogram_bits) *
       VP8LSubSampleSize(height, histogram_bits);
   VP8LHistogramSet* histogram_image =
       VP8LAllocateHistogramSet(histogram_image_xysize, 0);
   int histogram_image_size = 0;
-  int bit_array_size = 0;
+  size_t bit_array_size = 0;
   HuffmanTreeCode* huffman_codes = NULL;
   VP8LBackwardRefs refs;
   uint16_t* const histogram_symbols =
-      (uint16_t*)malloc(histogram_image_xysize * sizeof(*histogram_symbols));
+      (uint16_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize,
+                                sizeof(*histogram_symbols));
   assert(histogram_bits >= MIN_HUFFMAN_BITS);
   assert(histogram_bits <= MAX_HUFFMAN_BITS);
   if (histogram_image == NULL || histogram_symbols == NULL) goto Error;
@@ -533,8 +534,8 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
   // Create Huffman bit lengths and codes for each histogram image.
   histogram_image_size = histogram_image->size;
   bit_array_size = 5 * histogram_image_size;
-  huffman_codes = (HuffmanTreeCode*)calloc(bit_array_size,
-                                           sizeof(*huffman_codes));
+  huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
+                                                   sizeof(*huffman_codes));
   if (huffman_codes == NULL ||
       !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
     goto Error;
@@ -552,8 +553,10 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
     VP8LWriteBits(bw, 1, write_histogram_image);
     if (write_histogram_image) {
       uint32_t* const histogram_argb =
-          (uint32_t*)malloc(histogram_image_xysize * sizeof(*histogram_argb));
+          (uint32_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize,
+                                    sizeof(*histogram_argb));
       int max_index = 0;
+      uint32_t i;
       if (histogram_argb == NULL) goto Error;
       for (i = 0; i < histogram_image_xysize; ++i) {
         const int index = histogram_symbols[i] & 0xffff;
@@ -575,14 +578,14 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
   }
 
   // Store Huffman codes.
-  for (i = 0; i < 5 * histogram_image_size; ++i) {
-    HuffmanTreeCode* const codes = &huffman_codes[i];
-    if (!StoreHuffmanCode(bw, codes)) {
-      goto Error;
+  {
+    int i;
+    for (i = 0; i < 5 * histogram_image_size; ++i) {
+      HuffmanTreeCode* const codes = &huffman_codes[i];
+      if (!StoreHuffmanCode(bw, codes)) goto Error;
+      ClearHuffmanTreeIfOnlyOneSymbol(codes);
     }
-    ClearHuffmanTreeIfOnlyOneSymbol(codes);
   }
-
   // Free combined histograms.
   free(histogram_image);
   histogram_image = NULL;
@@ -769,14 +772,14 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
                                                  int width, int height) {
   WebPEncodingError err = VP8_ENC_OK;
   const int tile_size = 1 << enc->transform_bits_;
-  const size_t image_size = width * height;
-  const size_t argb_scratch_size = tile_size * width + width;
-  const size_t transform_data_size =
-      VP8LSubSampleSize(width, enc->transform_bits_) *
-      VP8LSubSampleSize(height, enc->transform_bits_);
-  const size_t total_size =
+  const uint64_t image_size = width * height;
+  const uint64_t argb_scratch_size = tile_size * width + width;
+  const uint64_t transform_data_size =
+      (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) *
+      (uint64_t)VP8LSubSampleSize(height, enc->transform_bits_);
+  const uint64_t total_size =
       image_size + argb_scratch_size + transform_data_size;
-  uint32_t* mem = (uint32_t*)malloc(total_size * sizeof(*mem));
+  uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem));
   if (mem == NULL) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
diff --git a/src/enc/webpenc.c b/src/enc/webpenc.c
index 8649ce2d..3c275589 100644
--- a/src/enc/webpenc.c
+++ b/src/enc/webpenc.c
@@ -16,6 +16,7 @@
 
 #include "./vp8enci.h"
 #include "./vp8li.h"
+#include "../utils/utils.h"
 
 // #define PRINT_MEMORY_INFO
 
@@ -164,13 +165,14 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
       config->autofilter ? sizeof(LFStats) + ALIGN_CST : 0;
   VP8Encoder* enc;
   uint8_t* mem;
-  size_t size = sizeof(VP8Encoder) + ALIGN_CST  // main struct
-              + cache_size                      // working caches
-              + info_size                       // modes info
-              + preds_size                      // prediction modes
-              + samples_size                    // top/left samples
-              + nz_size                         // coeff context bits
-              + lf_stats_size;                  // autofilter stats
+  const uint64_t size = (uint64_t)sizeof(VP8Encoder)   // main struct
+                      + ALIGN_CST                      // cache alignment
+                      + cache_size                     // working caches
+                      + info_size                      // modes info
+                      + preds_size                     // prediction modes
+                      + samples_size                   // top/left samples
+                      + nz_size                        // coeff context bits
+                      + lf_stats_size;                 // autofilter stats
 
 #ifdef PRINT_MEMORY_INFO
   printf("===================================\n");
@@ -198,7 +200,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
          mb_w * mb_h * 384 * sizeof(uint8_t));
   printf("===================================\n");
 #endif
-  mem = (uint8_t*)malloc(size);
+  mem = (uint8_t*)WebPSafeMalloc(size, sizeof(*mem));
   if (mem == NULL) {
     WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
     return NULL;
diff --git a/src/utils/color_cache.c b/src/utils/color_cache.c
index 1d20c9c9..560f81db 100644
--- a/src/utils/color_cache.c
+++ b/src/utils/color_cache.c
@@ -12,6 +12,7 @@
 #include <assert.h>
 #include <stdlib.h>
 #include "./color_cache.h"
+#include "../utils/utils.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
@@ -24,7 +25,8 @@ int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
   const int hash_size = 1 << hash_bits;
   assert(cc != NULL);
   assert(hash_bits > 0);
-  cc->colors_ = (uint32_t*)calloc(hash_size, sizeof(*cc->colors_));
+  cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size,
+                                          sizeof(*cc->colors_));
   if (cc->colors_ == NULL) return 0;
   cc->hash_shift_ = 32 - hash_bits;
   return 1;
diff --git a/src/utils/huffman.c b/src/utils/huffman.c
index 0ac8248e..41529cc9 100644
--- a/src/utils/huffman.c
+++ b/src/utils/huffman.c
@@ -12,6 +12,7 @@
 #include <assert.h>
 #include <stdlib.h>
 #include "./huffman.h"
+#include "../utils/utils.h"
 #include "../webp/format_constants.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
@@ -49,8 +50,8 @@ static int TreeInit(HuffmanTree* const tree, int num_leaves) {
   // Note that a Huffman tree is a full binary tree; and in a full binary tree
   // with L leaves, the total number of nodes N = 2 * L - 1.
   tree->max_nodes_ = 2 * num_leaves - 1;
-  tree->root_ =
-      (HuffmanTreeNode*)malloc(tree->max_nodes_ * sizeof(*tree->root_));
+  tree->root_ = (HuffmanTreeNode*)WebPSafeMalloc((uint64_t)tree->max_nodes_,
+                                                 sizeof(*tree->root_));
   if (tree->root_ == NULL) return 0;
   TreeNodeInit(tree->root_);  // Initialize root.
   tree->num_nodes_ = 1;
@@ -173,7 +174,8 @@ int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
     int ok = 0;
 
     // Get Huffman codes from the code lengths.
-    int* const codes = (int*)malloc(code_lengths_size * sizeof(*codes));
+    int* const codes =
+        (int*)WebPSafeMalloc((uint64_t)code_lengths_size, sizeof(*codes));
     if (codes == NULL) goto End;
 
     if (!HuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, codes)) {
diff --git a/src/utils/huffman_encode.c b/src/utils/huffman_encode.c
index a78874fc..8ccd291d 100644
--- a/src/utils/huffman_encode.c
+++ b/src/utils/huffman_encode.c
@@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "./huffman_encode.h"
+#include "../utils/utils.h"
 #include "../webp/format_constants.h"
 
 // -----------------------------------------------------------------------------
@@ -196,7 +197,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
   // population and all the inserted nodes combining two existing nodes.
   // The tree pool needs 2 * (tree_size_orig - 1) entities, and the
   // tree needs exactly tree_size_orig entities.
-  tree = (HuffmanTree*)malloc(3 * tree_size_orig * sizeof(*tree));
+  tree = (HuffmanTree*)WebPSafeMalloc(3ULL * tree_size_orig, sizeof(*tree));
   if (tree == NULL) return 0;
   tree_pool = tree + tree_size_orig;
 

From 323dc4d9b9d5fdbab554671f89de4f140d328cc3 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Wed, 1 Aug 2012 18:22:06 -0700
Subject: [PATCH 18/42] remove use of log2(). Use VP8LFastLog2() instead.

Order-by-cost mostly unchanged (up to a scaling constant 1/log(2))
(except for few minor diff in < 2% of cases)

+ remove unused field cost_mode->cache_bits_

Change-Id: I714f8ab12f49a23f5d499a64c741382c9b489a3e
---
 src/dsp/lossless.c            | 272 ++++++++++++++++++++--------------
 src/dsp/lossless.h            |   6 +-
 src/enc/backward_references.c |  70 +++++----
 src/enc/histogram.c           |  55 +------
 src/enc/histogram.h           |   3 -
 5 files changed, 214 insertions(+), 192 deletions(-)

diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
index cb8ad0bc..9b446cf9 100644
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -23,113 +23,156 @@ extern "C" {
 #include "../dsp/dsp.h"
 #include "../enc/histogram.h"
 
-// A lookup table for small values of log(int) to be used in entropy
-// computation.
-//
-// ", ".join(["%.16ff" % x for x in [0.0]+[log(x) for x in range(1, 256)]])
+#define MAX_DIFF_COST (1e30f)
+
+// lookup table for small values of log2(int)
+#define APPROX_LOG_MAX  4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
 #define LOG_LOOKUP_IDX_MAX 256
-static const float kLogTable[LOG_LOOKUP_IDX_MAX] = {
-  0.0000000000000000f, 0.0000000000000000f, 0.6931471805599453f,
-  1.0986122886681098f, 1.3862943611198906f, 1.6094379124341003f,
-  1.7917594692280550f, 1.9459101490553132f, 2.0794415416798357f,
-  2.1972245773362196f, 2.3025850929940459f, 2.3978952727983707f,
-  2.4849066497880004f, 2.5649493574615367f, 2.6390573296152584f,
-  2.7080502011022101f, 2.7725887222397811f, 2.8332133440562162f,
-  2.8903717578961645f, 2.9444389791664403f, 2.9957322735539909f,
-  3.0445224377234230f, 3.0910424533583161f, 3.1354942159291497f,
-  3.1780538303479458f, 3.2188758248682006f, 3.2580965380214821f,
-  3.2958368660043291f, 3.3322045101752038f, 3.3672958299864741f,
-  3.4011973816621555f, 3.4339872044851463f, 3.4657359027997265f,
-  3.4965075614664802f, 3.5263605246161616f, 3.5553480614894135f,
-  3.5835189384561099f, 3.6109179126442243f, 3.6375861597263857f,
-  3.6635616461296463f, 3.6888794541139363f, 3.7135720667043080f,
-  3.7376696182833684f, 3.7612001156935624f, 3.7841896339182610f,
-  3.8066624897703196f, 3.8286413964890951f, 3.8501476017100584f,
-  3.8712010109078911f, 3.8918202981106265f, 3.9120230054281460f,
-  3.9318256327243257f, 3.9512437185814275f, 3.9702919135521220f,
-  3.9889840465642745f, 4.0073331852324712f, 4.0253516907351496f,
-  4.0430512678345503f, 4.0604430105464191f, 4.0775374439057197f,
-  4.0943445622221004f, 4.1108738641733114f, 4.1271343850450917f,
-  4.1431347263915326f, 4.1588830833596715f, 4.1743872698956368f,
-  4.1896547420264252f, 4.2046926193909657f, 4.2195077051761070f,
-  4.2341065045972597f, 4.2484952420493594f, 4.2626798770413155f,
-  4.2766661190160553f, 4.2904594411483910f, 4.3040650932041702f,
-  4.3174881135363101f, 4.3307333402863311f, 4.3438054218536841f,
-  4.3567088266895917f, 4.3694478524670215f, 4.3820266346738812f,
-  4.3944491546724391f, 4.4067192472642533f, 4.4188406077965983f,
-  4.4308167988433134f, 4.4426512564903167f, 4.4543472962535073f,
-  4.4659081186545837f, 4.4773368144782069f, 4.4886363697321396f,
-  4.4998096703302650f, 4.5108595065168497f, 4.5217885770490405f,
-  4.5325994931532563f, 4.5432947822700038f, 4.5538768916005408f,
-  4.5643481914678361f, 4.5747109785033828f, 4.5849674786705723f,
-  4.5951198501345898f, 4.6051701859880918f, 4.6151205168412597f,
-  4.6249728132842707f, 4.6347289882296359f, 4.6443908991413725f,
-  4.6539603501575231f, 4.6634390941120669f, 4.6728288344619058f,
-  4.6821312271242199f, 4.6913478822291435f, 4.7004803657924166f,
-  4.7095302013123339f, 4.7184988712950942f, 4.7273878187123408f,
-  4.7361984483944957f, 4.7449321283632502f, 4.7535901911063645f,
-  4.7621739347977563f, 4.7706846244656651f, 4.7791234931115296f,
-  4.7874917427820458f, 4.7957905455967413f, 4.8040210447332568f,
-  4.8121843553724171f, 4.8202815656050371f, 4.8283137373023015f,
-  4.8362819069514780f, 4.8441870864585912f, 4.8520302639196169f,
-  4.8598124043616719f, 4.8675344504555822f, 4.8751973232011512f,
-  4.8828019225863706f, 4.8903491282217537f, 4.8978397999509111f,
-  4.9052747784384296f, 4.9126548857360524f, 4.9199809258281251f,
-  4.9272536851572051f, 4.9344739331306915f, 4.9416424226093039f,
-  4.9487598903781684f, 4.9558270576012609f, 4.9628446302599070f,
-  4.9698132995760007f, 4.9767337424205742f, 4.9836066217083363f,
-  4.9904325867787360f, 4.9972122737641147f, 5.0039463059454592f,
-  5.0106352940962555f, 5.0172798368149243f, 5.0238805208462765f,
-  5.0304379213924353f, 5.0369526024136295f, 5.0434251169192468f,
-  5.0498560072495371f, 5.0562458053483077f, 5.0625950330269669f,
-  5.0689042022202315f, 5.0751738152338266f, 5.0814043649844631f,
-  5.0875963352323836f, 5.0937502008067623f, 5.0998664278241987f,
-  5.1059454739005803f, 5.1119877883565437f, 5.1179938124167554f,
-  5.1239639794032588f, 5.1298987149230735f, 5.1357984370502621f,
-  5.1416635565026603f, 5.1474944768134527f, 5.1532915944977793f,
-  5.1590552992145291f, 5.1647859739235145f, 5.1704839950381514f,
-  5.1761497325738288f, 5.1817835502920850f, 5.1873858058407549f,
-  5.1929568508902104f, 5.1984970312658261f, 5.2040066870767951f,
-  5.2094861528414214f, 5.2149357576089859f, 5.2203558250783244f,
-  5.2257466737132017f, 5.2311086168545868f, 5.2364419628299492f,
-  5.2417470150596426f, 5.2470240721604862f, 5.2522734280466299f,
-  5.2574953720277815f, 5.2626901889048856f, 5.2678581590633282f,
-  5.2729995585637468f, 5.2781146592305168f, 5.2832037287379885f,
-  5.2882670306945352f, 5.2933048247244923f, 5.2983173665480363f,
-  5.3033049080590757f, 5.3082676974012051f, 5.3132059790417872f,
-  5.3181199938442161f, 5.3230099791384085f, 5.3278761687895813f,
-  5.3327187932653688f, 5.3375380797013179f, 5.3423342519648109f,
-  5.3471075307174685f, 5.3518581334760666f, 5.3565862746720123f,
-  5.3612921657094255f, 5.3659760150218512f, 5.3706380281276624f,
-  5.3752784076841653f, 5.3798973535404597f, 5.3844950627890888f,
-  5.3890717298165010f, 5.3936275463523620f, 5.3981627015177525f,
-  5.4026773818722793f, 5.4071717714601188f, 5.4116460518550396f,
-  5.4161004022044201f, 5.4205349992722862f, 5.4249500174814029f,
-  5.4293456289544411f, 5.4337220035542400f, 5.4380793089231956f,
-  5.4424177105217932f, 5.4467373716663099f, 5.4510384535657002f,
-  5.4553211153577017f, 5.4595855141441589f, 5.4638318050256105f,
-  5.4680601411351315f, 5.4722706736714750f, 5.4764635519315110f,
-  5.4806389233419912f, 5.4847969334906548f, 5.4889377261566867f,
-  5.4930614433405482f, 5.4971682252932021f, 5.5012582105447274f,
-  5.5053315359323625f, 5.5093883366279774f, 5.5134287461649825f,
-  5.5174528964647074f, 5.5214609178622460f, 5.5254529391317835f,
-  5.5294290875114234f, 5.5333894887275203f, 5.5373342670185366f,
-  5.5412635451584258f
+static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
+  0.0000000000000000f, 0.0000000000000000f,
+  1.0000000000000000f, 1.5849625007211560f,
+  2.0000000000000000f, 2.3219280948873621f,
+  2.5849625007211560f, 2.8073549220576041f,
+  3.0000000000000000f, 3.1699250014423121f,
+  3.3219280948873621f, 3.4594316186372973f,
+  3.5849625007211560f, 3.7004397181410921f,
+  3.8073549220576041f, 3.9068905956085187f,
+  4.0000000000000000f, 4.0874628412503390f,
+  4.1699250014423121f, 4.2479275134435852f,
+  4.3219280948873626f, 4.3923174227787606f,
+  4.4594316186372973f, 4.5235619560570130f,
+  4.5849625007211560f, 4.6438561897747243f,
+  4.7004397181410917f, 4.7548875021634682f,
+  4.8073549220576037f, 4.8579809951275718f,
+  4.9068905956085187f, 4.9541963103868749f,
+  5.0000000000000000f, 5.0443941193584533f,
+  5.0874628412503390f, 5.1292830169449663f,
+  5.1699250014423121f, 5.2094533656289501f,
+  5.2479275134435852f, 5.2854022188622487f,
+  5.3219280948873626f, 5.3575520046180837f,
+  5.3923174227787606f, 5.4262647547020979f,
+  5.4594316186372973f, 5.4918530963296747f,
+  5.5235619560570130f, 5.5545888516776376f,
+  5.5849625007211560f, 5.6147098441152083f,
+  5.6438561897747243f, 5.6724253419714951f,
+  5.7004397181410917f, 5.7279204545631987f,
+  5.7548875021634682f, 5.7813597135246599f,
+  5.8073549220576037f, 5.8328900141647412f,
+  5.8579809951275718f, 5.8826430493618415f,
+  5.9068905956085187f, 5.9307373375628866f,
+  5.9541963103868749f, 5.9772799234999167f,
+  6.0000000000000000f, 6.0223678130284543f,
+  6.0443941193584533f, 6.0660891904577720f,
+  6.0874628412503390f, 6.1085244567781691f,
+  6.1292830169449663f, 6.1497471195046822f,
+  6.1699250014423121f, 6.1898245588800175f,
+  6.2094533656289501f, 6.2288186904958804f,
+  6.2479275134435852f, 6.2667865406949010f,
+  6.2854022188622487f, 6.3037807481771030f,
+  6.3219280948873626f, 6.3398500028846243f,
+  6.3575520046180837f, 6.3750394313469245f,
+  6.3923174227787606f, 6.4093909361377017f,
+  6.4262647547020979f, 6.4429434958487279f,
+  6.4594316186372973f, 6.4757334309663976f,
+  6.4918530963296747f, 6.5077946401986963f,
+  6.5235619560570130f, 6.5391588111080309f,
+  6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211560f, 6.5999128421871278f,
+  6.6147098441152083f, 6.6293566200796094f,
+  6.6438561897747243f, 6.6582114827517946f,
+  6.6724253419714951f, 6.6865005271832185f,
+  6.7004397181410917f, 6.7142455176661224f,
+  6.7279204545631987f, 6.7414669864011464f,
+  6.7548875021634682f, 6.7681843247769259f,
+  6.7813597135246599f, 6.7944158663501061f,
+  6.8073549220576037f, 6.8201789624151878f,
+  6.8328900141647412f, 6.8454900509443747f,
+  6.8579809951275718f, 6.8703647195834047f,
+  6.8826430493618415f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745946f,
+  6.9307373375628866f, 6.9425145053392398f,
+  6.9541963103868749f, 6.9657842846620869f,
+  6.9772799234999167f, 6.9886846867721654f,
+  7.0000000000000000f, 7.0112272554232539f,
+  7.0223678130284543f, 7.0334230015374501f,
+  7.0443941193584533f, 7.0552824355011898f,
+  7.0660891904577720f, 7.0768155970508308f,
+  7.0874628412503390f, 7.0980320829605263f,
+  7.1085244567781691f, 7.1189410727235076f,
+  7.1292830169449663f, 7.1395513523987936f,
+  7.1497471195046822f, 7.1598713367783890f,
+  7.1699250014423121f, 7.1799090900149344f,
+  7.1898245588800175f, 7.1996723448363644f,
+  7.2094533656289501f, 7.2191685204621611f,
+  7.2288186904958804f, 7.2384047393250785f,
+  7.2479275134435852f, 7.2573878426926521f,
+  7.2667865406949010f, 7.2761244052742375f,
+  7.2854022188622487f, 7.2946207488916270f,
+  7.3037807481771030f, 7.3128829552843557f,
+  7.3219280948873626f, 7.3309168781146167f,
+  7.3398500028846243f, 7.3487281542310771f,
+  7.3575520046180837f, 7.3663222142458160f,
+  7.3750394313469245f, 7.3837042924740519f,
+  7.3923174227787606f, 7.4008794362821843f,
+  7.4093909361377017f, 7.4178525148858982f,
+  7.4262647547020979f, 7.4346282276367245f,
+  7.4429434958487279f, 7.4512111118323289f,
+  7.4594316186372973f, 7.4676055500829976f,
+  7.4757334309663976f, 7.4838157772642563f,
+  7.4918530963296747f, 7.4998458870832056f,
+  7.5077946401986963f, 7.5156998382840427f,
+  7.5235619560570130f, 7.5313814605163118f,
+  7.5391588111080309f, 7.5468944598876364f,
+  7.5545888516776376f, 7.5622424242210728f,
+  7.5698556083309478f, 7.5774288280357486f,
+  7.5849625007211560f, 7.5924570372680806f,
+  7.5999128421871278f, 7.6073303137496104f,
+  7.6147098441152083f, 7.6220518194563764f,
+  7.6293566200796094f, 7.6366246205436487f,
+  7.6438561897747243f, 7.6510516911789281f,
+  7.6582114827517946f, 7.6653359171851764f,
+  7.6724253419714951f, 7.6794800995054464f,
+  7.6865005271832185f, 7.6934869574993252f,
+  7.7004397181410917f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071855f,
+  7.7279204545631987f, 7.7347096202258383f,
+  7.7414669864011464f, 7.7481928495894605f,
+  7.7548875021634682f, 7.7615512324444795f,
+  7.7681843247769259f, 7.7747870596011736f,
+  7.7813597135246599f, 7.7879025593914317f,
+  7.7944158663501061f, 7.8008998999203047f,
+  7.8073549220576037f, 7.8137811912170374f,
+  7.8201789624151878f, 7.8265484872909150f,
+  7.8328900141647412f, 7.8392037880969436f,
+  7.8454900509443747f, 7.8517490414160571f,
+  7.8579809951275718f, 7.8641861446542797f,
+  7.8703647195834047f, 7.8765169465649993f,
+  7.8826430493618415f, 7.8887432488982591f,
+  7.8948177633079437f, 7.9008668079807486f,
+  7.9068905956085187f, 7.9128893362299619f,
+  7.9188632372745946f, 7.9248125036057812f,
+  7.9307373375628866f, 7.9366379390025709f,
+  7.9425145053392398f, 7.9483672315846778f,
+  7.9541963103868749f, 7.9600019320680805f,
+  7.9657842846620869f, 7.9715435539507719f,
+  7.9772799234999167f, 7.9829935746943103f,
+  7.9886846867721654f, 7.9943534368588577f
 };
 
-#define APPROX_LOG_MAX  4096
-#define LOG_2_BASE_E    0.6931471805599453f
-
-float VP8LFastLog(int v) {
-  if (v < APPROX_LOG_MAX) {
+float VP8LFastLog2(int v) {
+  if (v < LOG_LOOKUP_IDX_MAX) {
+    return kLog2Table[v];
+  } else if (v < APPROX_LOG_MAX) {
     int log_cnt = 0;
     while (v >= LOG_LOOKUP_IDX_MAX) {
       ++log_cnt;
       v = v >> 1;
     }
-    return kLogTable[v] + (log_cnt * LOG_2_BASE_E);
+    return kLog2Table[v] + (float)log_cnt;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * log((double)v));
   }
-  return (float)log(v);
 }
 
 //------------------------------------------------------------------------------
@@ -284,7 +327,7 @@ static const PredictorFunc kPredictors[16] = {
 };
 
 // TODO(vikasa): Replace 256 etc with defines.
-static double PredictionCostSpatial(const int* counts,
+static float PredictionCostSpatial(const int* counts,
                                     int weight_0, double exp_val) {
   const int significant_symbols = 16;
   const double exp_decay_factor = 0.6;
@@ -294,27 +337,26 @@ static double PredictionCostSpatial(const int* counts,
     bits += exp_val * (counts[i] + counts[256 - i]);
     exp_val *= exp_decay_factor;
   }
-  return -0.1 * bits;
+  return (float)(-0.1 * bits);
 }
 
 // Compute the Shanon's entropy: Sum(p*log2(p))
-static double ShannonEntropy(const int* const array, int n) {
+static float ShannonEntropy(const int* const array, int n) {
   int i;
-  double retval = 0;
+  float retval = 0.f;
   int sum = 0;
   for (i = 0; i < n; ++i) {
     if (array[i] != 0) {
       sum += array[i];
-      retval += array[i] * VP8LFastLog(array[i]);
+      retval -= VP8LFastSLog2(array[i]);
     }
   }
-  retval -= sum * VP8LFastLog(sum);
-  retval *= -1.4426950408889634;  // 1.0 / -FastLog(2);
+  retval += VP8LFastSLog2(sum);
   return retval;
 }
 
-static double PredictionCostSpatialHistogram(int accumulated[4][256],
-                                             int tile[4][256]) {
+static float PredictionCostSpatialHistogram(int accumulated[4][256],
+                                            int tile[4][256]) {
   int i;
   int k;
   int combo[256];
@@ -328,7 +370,7 @@ static double PredictionCostSpatialHistogram(int accumulated[4][256],
     }
     retval += ShannonEntropy(&combo[0], 256);
   }
-  return retval;
+  return (float)retval;
 }
 
 static int GetBestPredictorForTile(int width, int height,
@@ -344,14 +386,14 @@ static int GetBestPredictorForTile(int width, int height,
   const int xmax = (tile_size <= width - col_start) ?
       tile_size : width - col_start;
   int histo[4][256];
-  double best_diff = 1e99;
+  float best_diff = MAX_DIFF_COST;
   int best_mode = 0;
 
   int mode;
   for (mode = 0; mode < kNumPredModes; ++mode) {
     const uint32_t* current_row = argb_scratch;
     const PredictorFunc pred_func = kPredictors[mode];
-    double cur_diff;
+    float cur_diff;
     int y;
     memset(&histo[0][0], 0, sizeof(histo));
     for (y = 0; y < ymax; ++y) {
@@ -630,8 +672,8 @@ static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
   return 0;
 }
 
-static double PredictionCostCrossColor(const int accumulated[256],
-                                       const int counts[256]) {
+static float PredictionCostCrossColor(const int accumulated[256],
+                                      const int counts[256]) {
   // Favor low entropy, locally and globally.
   int i;
   int combo[256];
@@ -651,8 +693,8 @@ static Multipliers GetBestColorTransformForTile(
     int* accumulated_red_histo,
     int* accumulated_blue_histo,
     const uint32_t* const argb) {
-  double best_diff = 1e99;
-  double cur_diff;
+  float best_diff = MAX_DIFF_COST;
+  float cur_diff;
   const int halfstep = step / 2;
   const int max_tile_size = 1 << bits;
   const int tile_y_offset = tile_y * max_tile_size;
@@ -704,7 +746,7 @@ static Multipliers GetBestColorTransformForTile(
       best_tx = tx;
     }
   }
-  best_diff = 1e99;
+  best_diff = MAX_DIFF_COST;
   green_to_red = best_tx.green_to_red_;
   for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
     for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h
index f00e90e0..992516fc 100644
--- a/src/dsp/lossless.h
+++ b/src/dsp/lossless.h
@@ -59,8 +59,10 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
   return (size + (1 << sampling_bits) - 1) >> sampling_bits;
 }
 
-// Faster logarithm for small integers, with the property of log(0) == 0.
-float VP8LFastLog(int v);
+// Faster logarithm for integers, with the property of log2(0) == 0.
+float VP8LFastLog2(int v);
+// Fast calculation of v * log2(v) for integer input.
+static WEBP_INLINE float VP8LFastSLog2(int v) { return VP8LFastLog2(v) * v; }
 
 // In-place difference of each component with mod 256.
 static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
diff --git a/src/enc/backward_references.c b/src/enc/backward_references.c
index b20a1fcf..b8c8ece8 100644
--- a/src/enc/backward_references.c
+++ b/src/enc/backward_references.c
@@ -14,6 +14,7 @@
 
 #include "./backward_references.h"
 #include "./histogram.h"
+#include "../dsp/lossless.h"
 #include "../utils/color_cache.h"
 #include "../utils/utils.h"
 
@@ -357,46 +358,65 @@ typedef struct {
   double literal_[PIX_OR_COPY_CODES_MAX];
   double blue_[VALUES_IN_BYTE];
   double distance_[NUM_DISTANCE_CODES];
-  int cache_bits_;
 } CostModel;
 
 static int BackwardReferencesTraceBackwards(
     int xsize, int ysize, int recursive_cost_model,
     const uint32_t* const argb, int cache_bits, VP8LBackwardRefs* const refs);
 
-static int CostModelBuild(CostModel* const p, int xsize, int ysize,
+static void ConvertPopulationCountTableToBitEstimates(
+    int num_symbols, const int population_counts[], double output[]) {
+  int sum = 0;
+  int nonzeros = 0;
+  int i;
+  for (i = 0; i < num_symbols; ++i) {
+    sum += population_counts[i];
+    if (population_counts[i] > 0) {
+      ++nonzeros;
+    }
+  }
+  if (nonzeros <= 1) {
+    memset(output, 0, num_symbols * sizeof(*output));
+  } else {
+    const double logsum = VP8LFastLog2(sum);
+    for (i = 0; i < num_symbols; ++i) {
+      output[i] = logsum - VP8LFastLog2(population_counts[i]);
+    }
+  }
+}
+
+static int CostModelBuild(CostModel* const m, int xsize, int ysize,
                           int recursion_level, const uint32_t* const argb,
                           int cache_bits) {
   int ok = 0;
   VP8LHistogram histo;
   VP8LBackwardRefs refs;
+  const int quality = 100;
 
   if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize)) goto Error;
 
-  p->cache_bits_ = cache_bits;
   if (recursion_level > 0) {
     if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
                                           argb, cache_bits, &refs)) {
       goto Error;
     }
   } else {
-    const int quality = 100;
     if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality,
                                      &refs)) {
       goto Error;
     }
   }
   VP8LHistogramCreate(&histo, &refs, cache_bits);
-  VP8LConvertPopulationCountTableToBitEstimates(
-      VP8LHistogramNumCodes(&histo), histo.literal_, p->literal_);
-  VP8LConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo.red_, p->red_);
-  VP8LConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo.blue_, p->blue_);
-  VP8LConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo.alpha_, p->alpha_);
-  VP8LConvertPopulationCountTableToBitEstimates(
-      NUM_DISTANCE_CODES, histo.distance_, p->distance_);
+  ConvertPopulationCountTableToBitEstimates(
+      VP8LHistogramNumCodes(&histo), histo.literal_, m->literal_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo.red_, m->red_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo.blue_, m->blue_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo.alpha_, m->alpha_);
+  ConvertPopulationCountTableToBitEstimates(
+      NUM_DISTANCE_CODES, histo.distance_, m->distance_);
   ok = 1;
 
  Error:
@@ -404,30 +424,30 @@ static int CostModelBuild(CostModel* const p, int xsize, int ysize,
   return ok;
 }
 
-static WEBP_INLINE double GetLiteralCost(const CostModel* const p, uint32_t v) {
-  return p->alpha_[v >> 24] +
-      p->red_[(v >> 16) & 0xff] +
-      p->literal_[(v >> 8) & 0xff] +
-      p->blue_[v & 0xff];
+static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
+  return m->alpha_[v >> 24] +
+         m->red_[(v >> 16) & 0xff] +
+         m->literal_[(v >> 8) & 0xff] +
+         m->blue_[v & 0xff];
 }
 
-static WEBP_INLINE double GetCacheCost(const CostModel* const p, uint32_t idx) {
+static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
   const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
-  return p->literal_[literal_idx];
+  return m->literal_[literal_idx];
 }
 
-static WEBP_INLINE double GetLengthCost(const CostModel* const p,
+static WEBP_INLINE double GetLengthCost(const CostModel* const m,
                                         uint32_t length) {
   int code, extra_bits_count, extra_bits_value;
   PrefixEncode(length, &code, &extra_bits_count, &extra_bits_value);
-  return p->literal_[VALUES_IN_BYTE + code] + extra_bits_count;
+  return m->literal_[VALUES_IN_BYTE + code] + extra_bits_count;
 }
 
-static WEBP_INLINE double GetDistanceCost(const CostModel* const p,
+static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
                                           uint32_t distance) {
   int code, extra_bits_count, extra_bits_value;
   PrefixEncode(distance, &code, &extra_bits_count, &extra_bits_value);
-  return p->distance_[code] + extra_bits_count;
+  return m->distance_[code] + extra_bits_count;
 }
 
 static int BackwardReferencesHashChainDistanceOnly(
diff --git a/src/enc/histogram.c b/src/enc/histogram.c
index da3b3d74..ca838e06 100644
--- a/src/enc/histogram.c
+++ b/src/enc/histogram.c
@@ -19,17 +19,6 @@
 #include "../dsp/lossless.h"
 #include "../utils/utils.h"
 
-#if defined(_MSC_VER) && !defined(NOT_HAVE_LOG2)
-# define NOT_HAVE_LOG2 1
-#endif
-
-#ifdef NOT_HAVE_LOG2
-static WEBP_INLINE double log2(double d) {
-  const double kLog2Reciprocal = 1.442695040888963;
-  return log(d) * kLog2Reciprocal;
-}
-#endif
-
 static void HistogramClear(VP8LHistogram* const p) {
   memset(p->literal_, 0, sizeof(p->literal_));
   memset(p->red_, 0, sizeof(p->red_));
@@ -88,33 +77,6 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
 
 // -----------------------------------------------------------------------------
 
-void VP8LConvertPopulationCountTableToBitEstimates(
-    int num_symbols, const int population_counts[], double output[]) {
-  int sum = 0;
-  int nonzeros = 0;
-  int i;
-  for (i = 0; i < num_symbols; ++i) {
-    sum += population_counts[i];
-    if (population_counts[i] > 0) {
-      ++nonzeros;
-    }
-  }
-  if (nonzeros <= 1) {
-    memset(output, 0, num_symbols * sizeof(*output));
-    return;
-  }
-  {
-    const double log2sum = log2(sum);
-    for (i = 0; i < num_symbols; ++i) {
-      if (population_counts[i] == 0) {
-        output[i] = log2sum;
-      } else {
-        output[i] = log2sum - log2(population_counts[i]);
-      }
-    }
-  }
-}
-
 void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
                                      const PixOrCopy* const v) {
   if (PixOrCopyIsLiteral(v)) {
@@ -139,7 +101,7 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
 
 
 static double BitsEntropy(const int* const array, int n) {
-  double retval = 0;
+  double retval = 0.;
   int sum = 0;
   int nonzeros = 0;
   int max_val = 0;
@@ -149,15 +111,14 @@ static double BitsEntropy(const int* const array, int n) {
     if (array[i] != 0) {
       sum += array[i];
       ++nonzeros;
-      retval += array[i] * VP8LFastLog(array[i]);
+      retval -= VP8LFastSLog2(array[i]);
       if (max_val < array[i]) {
         max_val = array[i];
       }
     }
   }
-  retval -= sum * VP8LFastLog(sum);
-  retval *= -1.4426950408889634;  // 1.0 / -Log(2);
-  mix = 0.627;
+  retval += VP8LFastSLog2(sum);
+
   if (nonzeros < 5) {
     if (nonzeros <= 1) {
       return 0;
@@ -177,15 +138,15 @@ static double BitsEntropy(const int* const array, int n) {
     } else {
       mix = 0.7;  // nonzeros == 4.
     }
+  } else {
+    mix = 0.627;
   }
+
   {
     double min_limit = 2 * sum - max_val;
     min_limit = mix * min_limit + (1.0 - mix) * retval;
-    if (retval < min_limit) {
-      return min_limit;
-    }
+    return (retval < min_limit) ? min_limit : retval;
   }
-  return retval;
 }
 
 double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
diff --git a/src/enc/histogram.h b/src/enc/histogram.h
index 480aba81..ec573c5c 100644
--- a/src/enc/histogram.h
+++ b/src/enc/histogram.h
@@ -101,9 +101,6 @@ static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) {
       ((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0);
 }
 
-void VP8LConvertPopulationCountTableToBitEstimates(
-    int num_symbols, const int population_counts[], double output[]);
-
 // Builds the histogram image.
 int VP8LGetHistoImageSymbols(int xsize, int ysize,
                              const VP8LBackwardRefs* const refs,

From 9b261bf5217002b486f5c1611ba334529dd29db9 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Wed, 1 Aug 2012 18:48:19 -0700
Subject: [PATCH 19/42] remove the last NOT_HAVE_LOG2 instances

Change-Id: I193ecf82316cd1d5d7ddeebebf8fc98afccf0ede
---
 Android.mk   | 2 +-
 Makefile.vc  | 2 +-
 configure.ac | 5 -----
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/Android.mk b/Android.mk
index 8daa508d..06418196 100644
--- a/Android.mk
+++ b/Android.mk
@@ -50,7 +50,7 @@ LOCAL_SRC_FILES := \
     src/utils/utils.c \
 
 LOCAL_CFLAGS := -Wall -DANDROID -DHAVE_MALLOC_H -DHAVE_PTHREAD \
-                -DNOT_HAVE_LOG2 -DWEBP_USE_THREAD \
+                -DWEBP_USE_THREAD \
                 -finline-functions -frename-registers -ffast-math \
                 -s -fomit-frame-pointer -Isrc/webp
 
diff --git a/Makefile.vc b/Makefile.vc
index c69b62dc..c7447d83 100644
--- a/Makefile.vc
+++ b/Makefile.vc
@@ -27,7 +27,7 @@ CCNODBG    = cl.exe /nologo /O2 /DNDEBUG
 CCDEBUG    = cl.exe /nologo /Od /Gm /Zi /D_DEBUG /RTC1
 CFLAGS     = /Isrc /nologo /W3 /EHsc /FD /c /GS
 CFLAGS     = $(CFLAGS) /DWIN32 /D_CRT_SECURE_NO_WARNINGS /DWIN32_LEAN_AND_MEAN
-CFLAGS     = $(CFLAGS) /DHAVE_WINCODEC_H /DWEBP_USE_THREAD /DNOT_HAVE_LOG2
+CFLAGS     = $(CFLAGS) /DHAVE_WINCODEC_H /DWEBP_USE_THREAD
 LDFLAGS    = /LARGEADDRESSAWARE /MANIFEST /NXCOMPAT /DYNAMICBASE
 LDFLAGS    = $(LDFLAGS) $(PLATFORM_LDFLAGS)
 LNKDLL     = link.exe /DLL
diff --git a/configure.ac b/configure.ac
index 09559681..932b4f30 100644
--- a/configure.ac
+++ b/configure.ac
@@ -29,11 +29,6 @@ AC_DEFUN([WITHLIB_OPTION],
                               [use $2 libraries from DIR]),
                [$2_LIBS="-L$withval"])])
 
-dnl === Check for native log2
-AC_SEARCH_LIBS([log2], [m],,
-               [AC_DEFINE([NOT_HAVE_LOG2], [1],
-                          [Undefine this if you have log2().])])
-
 dnl === Check for pthread support
 AC_ARG_ENABLE([threading],
               AS_HELP_STRING([--disable-threading],

From 4af3f6c4d3b154f9961b2f740e6971c8700d56c0 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Thu, 2 Aug 2012 11:55:36 -0700
Subject: [PATCH 20/42] fix indentation

Change-Id: Ib00b3cdc21ac336a56390f1e71c169e7fd4767a6
---
 src/dsp/lossless.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
index 9b446cf9..472e641e 100644
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -328,7 +328,7 @@ static const PredictorFunc kPredictors[16] = {
 
 // TODO(vikasa): Replace 256 etc with defines.
 static float PredictionCostSpatial(const int* counts,
-                                    int weight_0, double exp_val) {
+                                   int weight_0, double exp_val) {
   const int significant_symbols = 16;
   const double exp_decay_factor = 0.6;
   double bits = weight_0 * counts[0];

From 292ec5cc7dbabba78b8011e6fe24e5988b782f25 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Thu, 2 Aug 2012 14:03:30 -0700
Subject: [PATCH 21/42] quiet a few 'uninitialized' warnings

spurious in this case, but addresses e.g.,
... potentially uninitialized local variable 'weighted_average' used

Change-Id: Ib99998bf49e4af7a82ee66f13fb850ca5b17dc71
---
 examples/cwebp.c   | 2 +-
 src/enc/alpha.c    | 2 +-
 src/enc/analysis.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/cwebp.c b/examples/cwebp.c
index ef84a426..8441e94c 100644
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@@ -132,7 +132,7 @@ static HRESULT ReadPictureWithWIC(const char* filename,
   IWICBitmapDecoder* pDecoder = NULL;
   IStream* pStream = NULL;
   UINT frameCount = 0;
-  UINT width, height = 0;
+  UINT width = 0, height = 0;
   BYTE* rgb = NULL;
   WICPixelFormatGUID srcPixelFormat = { 0 };
   GUID srcContainerFormat = { 0 };
diff --git a/src/enc/alpha.c b/src/enc/alpha.c
index e012cb6d..2b36733c 100644
--- a/src/enc/alpha.c
+++ b/src/enc/alpha.c
@@ -234,7 +234,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
 
     // Try the other mode(s).
     {
-      WebPAuxStats best_stats;
+      WebPAuxStats best_stats = { 0 };
       size_t best_score = VP8BitWriterSize(&bw);
       if (pic->stats != NULL) best_stats = *pic->stats;
       for (test_filter = WEBP_FILTER_HORIZONTAL;
diff --git a/src/enc/analysis.c b/src/enc/analysis.c
index 9af0325e..22cfb492 100644
--- a/src/enc/analysis.c
+++ b/src/enc/analysis.c
@@ -147,7 +147,7 @@ static void SetSegmentAlphas(VP8Encoder* const enc,
 static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
   const int nb = enc->segment_hdr_.num_segments_;
   int centers[NUM_MB_SEGMENTS];
-  int weighted_average;
+  int weighted_average = 0;
   int map[256];
   int a, n, k;
   int min_a = 0, max_a = 255, range_a;

From c9ae57f59692ca8fdff62d4f7ea6be972680d390 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Thu, 2 Aug 2012 15:10:31 -0700
Subject: [PATCH 22/42] man/dwebp.1: add links to output file format details

Change-Id: I30e3e52e428c9e68ba2ec263024a1edc56ad6741
---
 man/dwebp.1 | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/man/dwebp.1 b/man/dwebp.1
index b8e843e1..5a559a81 100644
--- a/man/dwebp.1
+++ b/man/dwebp.1
@@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH DWEBP 1 "July 20, 2012"
+.TH DWEBP 1 "August 2, 2012"
 .SH NAME
 dwebp \- decompress a WebP file to an image file
 .SH SYNOPSIS
@@ -92,3 +92,11 @@ for the Debian project (and may be used by others).
 .br
 Please refer to http://developers.google.com/speed/webp/ for additional
 information.
+.SS Output file format details
+PAM: http://netpbm.sourceforge.net/doc/pam.html
+.br
+PGM: http://netpbm.sourceforge.net/doc/pgm.html
+.br
+PPM: http://netpbm.sourceforge.net/doc/ppm.html
+.br
+PNG: http://www.libpng.org/pub/png/png-sitemap.html#info

From 4b71ba035ac796054f1482246cb494efabcd2c9e Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Thu, 2 Aug 2012 16:11:02 -0700
Subject: [PATCH 23/42] README: sync [cd]webp help output

Change-Id: Ic54e0f3e5e2e667adb369321e5849890d3b96e42
dwebp: -pam, -alpha
---
 README | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/README b/README
index 4896b24d..1882b134 100644
--- a/README
+++ b/README
@@ -168,7 +168,7 @@ options:
   -noalpha ............... discard any transparency information.
   -lossless .............. Encode image losslessly.
   -hint <string> ......... Specify image characteristics hint.
-                           One of: photo, picture or graph.
+                           One of: photo, picture or graph
 
   -short ................. condense printed message
   -quiet ................. don't print anything.
@@ -231,7 +231,8 @@ Usage: dwebp in_file [options] [-o out_file]
 
 Decodes the WebP image file to PNG format [Default]
 Use following options to convert into alternate image formats:
-  -ppm ......... save the raw RGB samples as color PPM
+  -pam ......... save the raw RGBA samples as a color PAM
+  -ppm ......... save the raw RGB samples as a color PPM
   -pgm ......... save the raw YUV samples as a grayscale PGM
                  file with IMC4 layout.
  Other options are:
@@ -241,6 +242,7 @@ Use following options to convert into alternate image formats:
   -mt .......... use multi-threading
   -crop <x> <y> <w> <h> ... crop output with the given rectangle
   -scale <w> <h> .......... scale the output (*after* any cropping)
+  -alpha ....... only save the alpha plane.
   -h     ....... this help message.
   -v     ....... verbose (e.g. print encoding/decoding times)
   -noasm ....... disable all assembly optimizations.

From f94b04f04591b044fef0347fb68a2c404e85dd50 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Thu, 2 Aug 2012 17:23:02 -0700
Subject: [PATCH 24/42] move some RGB->YUV functions to yuv.h

will be needed later

Change-Id: I6b9e460db2d398b9fecd5d3c1bbdb3f2f3d4f5db
---
 src/dsp/yuv.h     | 33 ++++++++++++++++++++++++++++++++-
 src/enc/picture.c | 40 +++++++---------------------------------
 2 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/src/dsp/yuv.h b/src/dsp/yuv.h
index c662af67..22cb2596 100644
--- a/src/dsp/yuv.h
+++ b/src/dsp/yuv.h
@@ -5,7 +5,7 @@
 //  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
 // -----------------------------------------------------------------------------
 //
-// inline YUV->RGB conversion function
+// inline YUV<->RGB conversion function
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
@@ -14,6 +14,9 @@
 
 #include "../dec/decode_vp8.h"
 
+//------------------------------------------------------------------------------
+// YUV -> RGB conversion
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
@@ -95,6 +98,34 @@ static WEBP_INLINE uint32_t VP8Clip4Bits(uint8_t c) {
 // Must be called before everything, to initialize the tables.
 void VP8YUVInit(void);
 
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
+// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
+// More information at: http://en.wikipedia.org/wiki/YCbCr
+// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
+// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
+// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
+// We use 16bit fixed point operations.
+
+static WEBP_INLINE int VP8ClipUV(int v) {
+   v = (v + (257 << (YUV_FIX + 2 - 1))) >> (YUV_FIX + 2);
+   return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
+  const int kRound = (1 << (YUV_FIX - 1)) + (16 << YUV_FIX);
+  const int luma = 16839 * r + 33059 * g + 6420 * b;
+  return (luma + kRound) >> YUV_FIX;  // no need to clip
+}
+
+static WEBP_INLINE int VP8RGBToU(int r, int g, int b) {
+  return VP8ClipUV(-9719 * r - 19081 * g + 28800 * b);
+}
+
+static WEBP_INLINE int VP8RGBToV(int r, int g, int b) {
+  return VP8ClipUV(+28800 * r - 24116 * g - 4684 * b);
+}
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif
diff --git a/src/enc/picture.c b/src/enc/picture.c
index ce1b6cea..44eed060 100644
--- a/src/enc/picture.c
+++ b/src/enc/picture.c
@@ -17,6 +17,7 @@
 #include "../utils/rescaler.h"
 #include "../utils/utils.h"
 #include "../dsp/dsp.h"
+#include "../dsp/yuv.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
@@ -544,33 +545,6 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
 
 //------------------------------------------------------------------------------
 // RGB -> YUV conversion
-// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
-// More information at: http://en.wikipedia.org/wiki/YCbCr
-// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
-// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
-// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
-// We use 16bit fixed point operations.
-
-enum { YUV_FRAC = 16 };
-
-static WEBP_INLINE int clip_uv(int v) {
-   v = (v + (257 << (YUV_FRAC + 2 - 1))) >> (YUV_FRAC + 2);
-   return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
-}
-
-static WEBP_INLINE int rgb_to_y(int r, int g, int b) {
-  const int kRound = (1 << (YUV_FRAC - 1)) + (16 << YUV_FRAC);
-  const int luma = 16839 * r + 33059 * g + 6420 * b;
-  return (luma + kRound) >> YUV_FRAC;  // no need to clip
-}
-
-static WEBP_INLINE int rgb_to_u(int r, int g, int b) {
-  return clip_uv(-9719 * r - 19081 * g + 28800 * b);
-}
-
-static WEBP_INLINE int rgb_to_v(int r, int g, int b) {
-  return clip_uv(+28800 * r - 24116 * g - 4684 * b);
-}
 
 // TODO: we can do better than simply 2x2 averaging on U/V samples.
 #define SUM4(ptr) ((ptr)[0] + (ptr)[step] + \
@@ -584,8 +558,8 @@ static WEBP_INLINE int rgb_to_v(int r, int g, int b) {
   const int r = SUM(r_ptr + src);                        \
   const int g = SUM(g_ptr + src);                        \
   const int b = SUM(b_ptr + src);                        \
-  picture->u[dst] = rgb_to_u(r, g, b);                   \
-  picture->v[dst] = rgb_to_v(r, g, b);                   \
+  picture->u[dst] = VP8RGBToU(r, g, b);                  \
+  picture->v[dst] = VP8RGBToV(r, g, b);                  \
 }
 
 #define RGB_TO_UV0(x_in, x_out, y, SUM) {                \
@@ -594,8 +568,8 @@ static WEBP_INLINE int rgb_to_v(int r, int g, int b) {
   const int r = SUM(r_ptr + src);                        \
   const int g = SUM(g_ptr + src);                        \
   const int b = SUM(b_ptr + src);                        \
-  picture->u0[dst] = rgb_to_u(r, g, b);                  \
-  picture->v0[dst] = rgb_to_v(r, g, b);                  \
+  picture->u0[dst] = VP8RGBToU(r, g, b);                 \
+  picture->v0[dst] = VP8RGBToV(r, g, b);                 \
 }
 
 static void MakeGray(WebPPicture* const picture) {
@@ -633,7 +607,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
     for (x = 0; x < width; ++x) {
       const int offset = step * x + y * rgb_stride;
       picture->y[x + y * picture->y_stride] =
-          rgb_to_y(r_ptr[offset], g_ptr[offset], b_ptr[offset]);
+          VP8RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset]);
     }
   }
 
@@ -643,7 +617,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
       for (x = 0; x < (width >> 1); ++x) {
         RGB_TO_UV(x, y, SUM4);
       }
-      if (picture->width & 1) {
+      if (width & 1) {
         RGB_TO_UV(x, y, SUM2V);
       }
     }

From 3b02309347b38cdbc2e691ad2b53f634f714b9ea Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Thu, 2 Aug 2012 17:14:51 -0700
Subject: [PATCH 25/42] silence one more warning

inadvertently added in last warning roundup

Change-Id: I38e6bcfb18c133f2dc2b38cec81e12d2ff556011
---
 src/enc/alpha.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/enc/alpha.c b/src/enc/alpha.c
index 2b36733c..0e519b6c 100644
--- a/src/enc/alpha.c
+++ b/src/enc/alpha.c
@@ -234,8 +234,10 @@ static int EncodeAlpha(VP8Encoder* const enc,
 
     // Try the other mode(s).
     {
-      WebPAuxStats best_stats = { 0 };
+      WebPAuxStats best_stats;
       size_t best_score = VP8BitWriterSize(&bw);
+
+      memset(&best_stats, 0, sizeof(best_stats));  // prevent spurious warning
       if (pic->stats != NULL) best_stats = *pic->stats;
       for (test_filter = WEBP_FILTER_HORIZONTAL;
            ok && (test_filter <= WEBP_FILTER_GRADIENT);

From a06f802325cf81f46f57a14d5e5ce91ecd33aea1 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 3 Aug 2012 10:43:51 -0700
Subject: [PATCH 26/42] MODE_YUVA: set alpha to opaque if the image has none

this change avoids returning uninitialized alpha values when decoding
lossy with alpha to YUVA

Change-Id: I1e02459ac28b36f1f2b422063d057a5faba2f8f2
---
 src/dec/io.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/dec/io.c b/src/dec/io.c
index 8a9ee4e7..2c75f782 100644
--- a/src/dec/io.c
+++ b/src/dec/io.c
@@ -162,17 +162,24 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
 
 static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
   const uint8_t* alpha = io->a;
+  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
+  int j;
+
   if (alpha != NULL) {
-    int j;
-    const int mb_w = io->mb_w;
-    const int mb_h = io->mb_h;
-    const WebPYUVABuffer* const buf = &p->output->u.YUVA;
-    uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
     for (j = 0; j < mb_h; ++j) {
       memcpy(dst, alpha, mb_w * sizeof(*dst));
       alpha += io->width;
       dst += buf->a_stride;
     }
+  } else if (buf->a != NULL) {
+    // the user requested alpha, but there is none, set it to opaque.
+    for (j = 0; j < mb_h; ++j) {
+      memset(dst, 0xff, mb_w * sizeof(*dst));
+      dst += buf->a_stride;
+    }
   }
   return 0;
 }

From c40d7ef125cfad6a386a75f19dcdb13b328a3d06 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Fri, 3 Aug 2012 14:44:35 -0700
Subject: [PATCH 27/42] fix alpha-plane check + add extra checks

Change-Id: I9d8c9743f9d4f3d394544773ed2d0c31a9acf24d
---
 src/dec/buffer.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/dec/buffer.c b/src/dec/buffer.c
index e491d4c4..c159f6f2 100644
--- a/src/dec/buffer.c
+++ b/src/dec/buffer.c
@@ -51,18 +51,23 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
     ok &= (y_size <= buf->y_size);
     ok &= (u_size <= buf->u_size);
     ok &= (v_size <= buf->v_size);
-    ok &= (a_size <= buf->a_size);
     ok &= (buf->y_stride >= width);
     ok &= (buf->u_stride >= (width + 1) / 2);
     ok &= (buf->v_stride >= (width + 1) / 2);
-    if (buf->a) {
+    ok &= (buf->y != NULL);
+    ok &= (buf->u != NULL);
+    ok &= (buf->v != NULL);
+    if (mode == MODE_YUVA) {
       ok &= (buf->a_stride >= width);
+      ok &= (a_size <= buf->a_size);
+      ok &= (buf->a != NULL);
     }
   } else {    // RGB checks
     const WebPRGBABuffer* const buf = &buffer->u.RGBA;
     const uint64_t size = (uint64_t)buf->stride * height;
     ok &= (size <= buf->size);
     ok &= (buf->stride >= width * kModeBpp[mode]);
+    ok &= (buf->rgba != NULL);
   }
   return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
 }

From 33705ca09334e87f8da8b029b787ae583a470181 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Fri, 3 Aug 2012 06:48:11 +0000
Subject: [PATCH 28/42] bump version to 0.2.0

Change-Id: I01cb50b9c4c8e9245aede3947481cbbd27d6a19d
---
 README            | 2 +-
 configure.ac      | 2 +-
 src/Makefile.am   | 2 +-
 src/dec/vp8i.h    | 4 ++--
 src/enc/vp8enci.h | 4 ++--
 src/webp/decode.h | 2 +-
 src/webp/encode.h | 2 +-
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/README b/README
index 1882b134..8b904180 100644
--- a/README
+++ b/README
@@ -4,7 +4,7 @@
           \__\__/\____/\_____/__/ ____  ___
                 / _/ /    \    \ /  _ \/ _/
                /  \_/   / /   \ \   __/  \__
-               \____/____/\_____/_____/____/v0.1.99
+               \____/____/\_____/_____/____/v0.2.0
 
 Description:
 ============
diff --git a/configure.ac b/configure.ac
index 932b4f30..d81c4c98 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([libwebp], [0.1.99],
+AC_INIT([libwebp], [0.2.0],
         [http://code.google.com/p/webp/issues],,
         [http://developers.google.com/speed/webp])
 AC_CANONICAL_TARGET
diff --git a/src/Makefile.am b/src/Makefile.am
index ab55b676..69a09ef1 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -20,7 +20,7 @@ libwebp_la_LIBADD += dsp/libwebpdsp.la
 libwebp_la_LIBADD += enc/libwebpencode.la
 libwebp_la_LIBADD += utils/libwebputils.la
 
-libwebp_la_LDFLAGS = -version-info 3:0:0
+libwebp_la_LDFLAGS = -version-info 4:0:0
 libwebpincludedir = $(includedir)/webp
 
 pkgconfig_DATA = libwebp.pc
diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h
index 9406b754..4382edfd 100644
--- a/src/dec/vp8i.h
+++ b/src/dec/vp8i.h
@@ -27,8 +27,8 @@ extern "C" {
 
 // version numbers
 #define DEC_MAJ_VERSION 0
-#define DEC_MIN_VERSION 1
-#define DEC_REV_VERSION 99
+#define DEC_MIN_VERSION 2
+#define DEC_REV_VERSION 0
 
 #define ONLY_KEYFRAME_CODE      // to remove any code related to P-Frames
 
diff --git a/src/enc/vp8enci.h b/src/enc/vp8enci.h
index 56f2f11b..a77778c0 100644
--- a/src/enc/vp8enci.h
+++ b/src/enc/vp8enci.h
@@ -26,8 +26,8 @@ extern "C" {
 
 // version numbers
 #define ENC_MAJ_VERSION 0
-#define ENC_MIN_VERSION 1
-#define ENC_REV_VERSION 99
+#define ENC_MIN_VERSION 2
+#define ENC_REV_VERSION 0
 
 // size of histogram used by CollectHistogram.
 #define MAX_COEFF_THRESH   64
diff --git a/src/webp/decode.h b/src/webp/decode.h
index 3c007c52..9df8f89e 100644
--- a/src/webp/decode.h
+++ b/src/webp/decode.h
@@ -18,7 +18,7 @@
 extern "C" {
 #endif
 
-#define WEBP_DECODER_ABI_VERSION 0x0100    // MAJOR(8b) + MINOR(8b)
+#define WEBP_DECODER_ABI_VERSION 0x0200    // MAJOR(8b) + MINOR(8b)
 
 // Return the decoder's version number, packed in hexadecimal using 8bits for
 // each of major/minor/revision. E.g: v2.5.7 is 0x020507.
diff --git a/src/webp/encode.h b/src/webp/encode.h
index dfaf0bbf..2e37cfab 100644
--- a/src/webp/encode.h
+++ b/src/webp/encode.h
@@ -18,7 +18,7 @@
 extern "C" {
 #endif
 
-#define WEBP_ENCODER_ABI_VERSION 0x0100    // MAJOR(8b) + MINOR(8b)
+#define WEBP_ENCODER_ABI_VERSION 0x0200    // MAJOR(8b) + MINOR(8b)
 
 // Return the encoder's version number, packed in hexadecimal using 8bits for
 // each of major/minor/revision. E.g: v2.5.7 is 0x020507.

From 43b0d6107aef709b1db362d3641691b36e29a49a Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Fri, 3 Aug 2012 22:15:34 +0000
Subject: [PATCH 29/42] add support for ARGB -> YUVA conversion for lossless
 decoder

This was returning an (hard-to-explain) error before.
(through WebPDecodeYUV() for instance).

+ rationalize the incremental API:
-> add WebPINewYUVA
-> deprecated WebPINewYUV
-> add WebPIDecGetYUVA
-> deprecated WebPIDecGetYUV

+ some NULL cosmetics

Change-Id: I39a6bd6018a34294d898b29f6c40e2cf76f1037e
---
 README            |   4 +-
 src/dec/idec.c    |  74 ++++++++++++---------
 src/dec/vp8l.c    | 163 +++++++++++++++++++++++++++++++++++++++-------
 src/dec/vp8li.h   |   2 +
 src/webp/decode.h |  35 +++++++---
 5 files changed, 214 insertions(+), 64 deletions(-)

diff --git a/README b/README
index 8b904180..a90fda0f 100644
--- a/README
+++ b/README
@@ -405,12 +405,12 @@ The 'idec' object must always be released (even upon an error condition) by
 calling: WebPDelete(idec).
 
 To retrieve partially decoded picture samples, one must use the corresponding
-method: WebPIDecGetRGB or WebPIDecGetYUV.
+method: WebPIDecGetRGB or WebPIDecGetYUVA.
 It will return the last displayable pixel row.
 
 Lastly, note that decoding can also be performed into a pre-allocated pixel
 buffer. This buffer must be passed when creating a WebPIDecoder, calling
-WebPINewRGB() or WebPINewYUV().
+WebPINewRGB() or WebPINewYUVA().
 
 Please have a look at the src/webp/decode.h header for further details.
 
diff --git a/src/dec/idec.c b/src/dec/idec.c
index 7205991c..7df790ce 100644
--- a/src/dec/idec.c
+++ b/src/dec/idec.c
@@ -567,7 +567,7 @@ WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
   }
   // Create an instance of the incremental decoder
   idec = WebPINewDecoder(config ? &config->output : NULL);
-  if (!idec) {
+  if (idec == NULL) {
     return NULL;
   }
   // Finish initialization
@@ -599,7 +599,7 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
   WebPIDecoder* idec;
   if (mode >= MODE_YUV) return NULL;
   idec = WebPINewDecoder(NULL);
-  if (!idec) return NULL;
+  if (idec == NULL) return NULL;
   idec->output_.colorspace = mode;
   idec->output_.is_external_memory = 1;
   idec->output_.u.RGBA.rgba = output_buffer;
@@ -608,12 +608,13 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
   return idec;
 }
 
-WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
-                          uint8_t* u, size_t u_size, int u_stride,
-                          uint8_t* v, size_t v_size, int v_stride) {
+WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
+                           uint8_t* u, size_t u_size, int u_stride,
+                           uint8_t* v, size_t v_size, int v_stride,
+                           uint8_t* a, size_t a_size, int a_stride) {
   WebPIDecoder* const idec = WebPINewDecoder(NULL);
-  if (!idec) return NULL;
-  idec->output_.colorspace = MODE_YUV;
+  if (idec == NULL) return NULL;
+  idec->output_.colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
   idec->output_.is_external_memory = 1;
   idec->output_.u.YUVA.y = luma;
   idec->output_.u.YUVA.y_stride = luma_stride;
@@ -624,9 +625,21 @@ WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
   idec->output_.u.YUVA.v = v;
   idec->output_.u.YUVA.v_stride = v_stride;
   idec->output_.u.YUVA.v_size = v_size;
+  idec->output_.u.YUVA.a = a;
+  idec->output_.u.YUVA.a_stride = a_stride;
+  idec->output_.u.YUVA.a_size = a_size;
   return idec;
 }
 
+WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
+                          uint8_t* u, size_t u_size, int u_stride,
+                          uint8_t* v, size_t v_size, int v_stride) {
+  return WebPINewYUVA(luma, luma_size, luma_stride,
+                      u, u_size, u_stride,
+                      v, v_size, v_stride,
+                      NULL, 0, 0);
+}
+
 //------------------------------------------------------------------------------
 
 static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
@@ -698,15 +711,15 @@ const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
                                       int* left, int* top,
                                       int* width, int* height) {
   const WebPDecBuffer* const src = GetOutputBuffer(idec);
-  if (left) *left = 0;
-  if (top) *top = 0;
+  if (left != NULL) *left = 0;
+  if (top != NULL) *top = 0;
   // TODO(skal): later include handling of rotations.
   if (src) {
-    if (width) *width = src->width;
-    if (height) *height = idec->params_.last_y;
+    if (width != NULL) *width = src->width;
+    if (height != NULL) *height = idec->params_.last_y;
   } else {
-    if (width) *width = 0;
-    if (height) *height = 0;
+    if (width != NULL) *width = 0;
+    if (height != NULL) *height = 0;
   }
   return src;
 }
@@ -714,35 +727,38 @@ const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
 uint8_t* WebPIDecGetRGB(const WebPIDecoder* idec, int* last_y,
                         int* width, int* height, int* stride) {
   const WebPDecBuffer* const src = GetOutputBuffer(idec);
-  if (!src) return NULL;
+  if (src == NULL) return NULL;
   if (src->colorspace >= MODE_YUV) {
     return NULL;
   }
 
-  if (last_y) *last_y = idec->params_.last_y;
-  if (width) *width = src->width;
-  if (height) *height = src->height;
-  if (stride) *stride = src->u.RGBA.stride;
+  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (width != NULL) *width = src->width;
+  if (height != NULL) *height = src->height;
+  if (stride != NULL) *stride = src->u.RGBA.stride;
 
   return src->u.RGBA.rgba;
 }
 
-uint8_t* WebPIDecGetYUV(const WebPIDecoder* idec, int* last_y,
-                        uint8_t** u, uint8_t** v,
-                        int* width, int* height, int *stride, int* uv_stride) {
+uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
+                         uint8_t** u, uint8_t** v, uint8_t** a,
+                         int* width, int* height,
+                         int* stride, int* uv_stride, int* a_stride) {
   const WebPDecBuffer* const src = GetOutputBuffer(idec);
-  if (!src) return NULL;
+  if (src == NULL) return NULL;
   if (src->colorspace < MODE_YUV) {
     return NULL;
   }
 
-  if (last_y) *last_y = idec->params_.last_y;
-  if (u) *u = src->u.YUVA.u;
-  if (v) *v = src->u.YUVA.v;
-  if (width) *width = src->width;
-  if (height) *height = src->height;
-  if (stride) *stride = src->u.YUVA.y_stride;
-  if (uv_stride) *uv_stride = src->u.YUVA.u_stride;
+  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (u != NULL) *u = src->u.YUVA.u;
+  if (v != NULL) *v = src->u.YUVA.v;
+  if (a != NULL) *a = src->u.YUVA.a;
+  if (width != NULL) *width = src->width;
+  if (height != NULL) *height = src->height;
+  if (stride != NULL) *stride = src->u.YUVA.y_stride;
+  if (uv_stride != NULL) *uv_stride = src->u.YUVA.u_stride;
+  if (a_stride != NULL) *a_stride = src->u.YUVA.a_stride;
 
   return src->u.YUVA.y;
 }
diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index 398faf91..70edbeb6 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -14,6 +14,7 @@
 #include <stdlib.h>
 #include "./vp8li.h"
 #include "../dsp/lossless.h"
+#include "../dsp/yuv.h"
 #include "../utils/huffman.h"
 #include "../utils/utils.h"
 
@@ -404,10 +405,12 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
   return 1;
 }
 
+//------------------------------------------------------------------------------
+// Export to ARGB
+
 // We have special "export" function since we need to convert from BGRA
-static int Export(VP8LDecoder* const dec, WEBP_CSP_MODE colorspace,
+static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
                   int rgba_stride, uint8_t* const rgba) {
-  WebPRescaler* const rescaler = dec->rescaler;
   const uint32_t* const src = (const uint32_t*)rescaler->dst;
   const int dst_width = rescaler->dst_width;
   int num_lines_out = 0;
@@ -421,18 +424,19 @@ static int Export(VP8LDecoder* const dec, WEBP_CSP_MODE colorspace,
 }
 
 // Emit scaled rows.
-static int EmitRescaledRows(VP8LDecoder* const dec, WEBP_CSP_MODE colorspace,
+static int EmitRescaledRows(const VP8LDecoder* const dec,
                             const uint32_t* const data, int in_stride, int mb_h,
                             uint8_t* const out, int out_stride) {
+  const WEBP_CSP_MODE colorspace = dec->output_->colorspace;
   const uint8_t* const in = (const uint8_t*)data;
   int num_lines_in = 0;
   int num_lines_out = 0;
   while (num_lines_in < mb_h) {
-    const uint8_t* row_in = in + num_lines_in * in_stride;
+    const uint8_t* const row_in = in + num_lines_in * in_stride;
     uint8_t* const row_out = out + num_lines_out * out_stride;
     num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in,
                                        row_in, in_stride);
-    num_lines_out += Export(dec, colorspace, out_stride, row_out);
+    num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out);
   }
   return num_lines_out;
 }
@@ -453,6 +457,113 @@ static int EmitRows(WEBP_CSP_MODE colorspace,
   return mb_h;  // Num rows out == num rows in.
 }
 
+//------------------------------------------------------------------------------
+// Export to YUVA
+
+static void ConvertToYUVA(const uint32_t* const src, int width, int y_pos,
+                          const WebPDecBuffer* const output) {
+  const WebPYUVABuffer* const buf = &output->u.YUVA;
+  // first, the luma plane
+  {
+    int i;
+    uint8_t* const y = buf->y + y_pos * buf->y_stride;
+    for (i = 0; i < width; ++i) {
+      const uint32_t p = src[i];
+      y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff);
+    }
+  }
+
+  // then U/V planes
+  {
+    uint8_t* const u = buf->u + (y_pos >> 1) * buf->u_stride;
+    uint8_t* const v = buf->v + (y_pos >> 1) * buf->v_stride;
+    const int uv_width = width >> 1;
+    int i;
+    for (i = 0; i < uv_width; ++i) {
+      const uint32_t v0 = src[2 * i + 0];
+      const uint32_t v1 = src[2 * i + 1];
+      // VP8RGBToU/V expects four accumulated pixels. Hence we need to
+      // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less.
+      const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe);
+      const int g = ((v0 >>  7) & 0x1fe) + ((v1 >>  7) & 0x1fe);
+      const int b = ((v0 <<  1) & 0x1fe) + ((v1 <<  1) & 0x1fe);
+      if (!(y_pos & 1)) {  // even lines: store values
+        u[i] = VP8RGBToU(r, g, b);
+        v[i] = VP8RGBToV(r, g, b);
+      } else {             // odd lines: average with previous values
+        const int tmp_u = VP8RGBToU(r, g, b);
+        const int tmp_v = VP8RGBToV(r, g, b);
+        // Approximated average-of-four. But it's an acceptable diff.
+        u[i] = (u[i] + tmp_u + 1) >> 1;
+        v[i] = (v[i] + tmp_v + 1) >> 1;
+      }
+    }
+    if (width & 1) {       // last pixel
+      const uint32_t v0 = src[2 * i + 0];
+      const int r = (v0 >> 14) & 0x3fc;
+      const int g = (v0 >>  6) & 0x3fc;
+      const int b = (v0 <<  2) & 0x3fc;
+      if (!(y_pos & 1)) {  // even lines
+        u[i] = VP8RGBToU(r, g, b);
+        v[i] = VP8RGBToV(r, g, b);
+      } else {             // odd lines (note: we could just skip this)
+        const int tmp_u = VP8RGBToU(r, g, b);
+        const int tmp_v = VP8RGBToV(r, g, b);
+        u[i] = (u[i] + tmp_u + 1) >> 1;
+        v[i] = (v[i] + tmp_v + 1) >> 1;
+      }
+    }
+  }
+  // Lastly, store alpha if needed.
+  if (buf->a != NULL) {
+    int i;
+    uint8_t* const a = buf->a + y_pos * buf->a_stride;
+    for (i = 0; i < width; ++i) a[i] = (src[i] >> 24);
+  }
+}
+
+static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
+  WebPRescaler* const rescaler = dec->rescaler;
+  const uint32_t* const src = (const uint32_t*)rescaler->dst;
+  const int dst_width = rescaler->dst_width;
+  int num_lines_out = 0;
+  while (WebPRescalerHasPendingOutput(rescaler)) {
+    WebPRescalerExportRow(rescaler);
+    ConvertToYUVA(src, dst_width, y_pos, dec->output_);
+    ++y_pos;
+    ++num_lines_out;
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
+                                const uint32_t* const data,
+                                int in_stride, int mb_h) {
+  const uint8_t* const in = (const uint8_t*)data;
+  int num_lines_in = 0;
+  int y_pos = dec->last_out_row_;
+  while (num_lines_in < mb_h) {
+    const uint8_t* const row_in = in + num_lines_in * in_stride;
+    num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in,
+                                       row_in, in_stride);
+    y_pos += ExportYUVA(dec, y_pos);
+  }
+  return y_pos;
+}
+
+static int EmitRowsYUVA(const VP8LDecoder* const dec,
+                        const uint32_t* const data, int in_stride,
+                        int mb_w, int num_rows) {
+  int y_pos = dec->last_out_row_;
+  const uint8_t* row_in = (const uint8_t*)data;
+  while (num_rows-- > 0) {
+    ConvertToYUVA((const uint32_t*)row_in, mb_w, y_pos, dec->output_);
+    row_in += in_stride;
+    ++y_pos;
+  }
+  return y_pos;
+}
+
 //------------------------------------------------------------------------------
 // Cropping.
 
@@ -537,19 +648,23 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
     if (!SetCropWindow(io, dec->last_row_, row, &rows_data, io->width)) {
       // Nothing to output (this time).
     } else {
-      WebPDecParams* const params = (WebPDecParams*)io->opaque;
-      const WebPDecBuffer* const output = params->output;
-      const WebPRGBABuffer* const buf = &output->u.RGBA;
-      uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
+      const WebPDecBuffer* const output = dec->output_;
       const int in_stride = io->width * sizeof(*rows_data);
-      const WEBP_CSP_MODE colorspace = output->colorspace;
-      const int num_rows_out = io->use_scaling ?
-          EmitRescaledRows(dec, colorspace, rows_data, in_stride, io->mb_h,
-                           rgba, buf->stride) :
-          EmitRows(colorspace, rows_data, in_stride, io->mb_w, io->mb_h,
-                   rgba, buf->stride);
-      // Update 'last_out_row_'.
-      dec->last_out_row_ += num_rows_out;
+      if (output->colorspace < MODE_YUV) {  // convert to RGBA
+        const WebPRGBABuffer* const buf = &output->u.RGBA;
+        uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
+        const int num_rows_out = io->use_scaling ?
+            EmitRescaledRows(dec, rows_data, in_stride, io->mb_h,
+                             rgba, buf->stride) :
+            EmitRows(output->colorspace, rows_data, in_stride,
+                     io->mb_w, io->mb_h, rgba, buf->stride);
+        // Update 'last_out_row_'.
+        dec->last_out_row_ += num_rows_out;
+      } else {                              // convert to YUVA
+        dec->last_out_row_ = io->use_scaling ?
+            EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h) :
+            EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
+      }
       assert(dec->last_out_row_ <= output->height);
     }
   }
@@ -818,6 +933,8 @@ void VP8LClear(VP8LDecoder* const dec) {
 
   free(dec->rescaler_memory);
   dec->rescaler_memory = NULL;
+
+  dec->output_ = NULL;   // leave no trace behind
 }
 
 void VP8LDelete(VP8LDecoder* const dec) {
@@ -950,7 +1067,7 @@ static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) {
   assert(dec->width_ <= final_width);
   dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_));
   if (dec->argb_ == NULL) {
-    dec->argb_cache_ = NULL;
+    dec->argb_cache_ = NULL;    // for sanity check
     dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
     return 0;
   }
@@ -1052,20 +1169,16 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
 int VP8LDecodeImage(VP8LDecoder* const dec) {
   VP8Io* io = NULL;
   WebPDecParams* params = NULL;
-  WebPDecBuffer* output = NULL;
 
   // Sanity checks.
   if (dec == NULL) return 0;
 
   io = dec->io_;
+  assert(io != NULL);
   params = (WebPDecParams*)io->opaque;
   assert(params != NULL);
-  output = params->output;
-  // YUV modes are invalid.
-  if (output->colorspace >= MODE_YUV) {
-    dec->status_ = VP8_STATUS_INVALID_PARAM;
-    goto Err;
-  }
+  dec->output_ = params->output;
+  assert(dec->output_ != NULL);
 
   // Initialization.
   if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) {
diff --git a/src/dec/vp8li.h b/src/dec/vp8li.h
index 542dbb71..ee29eb5f 100644
--- a/src/dec/vp8li.h
+++ b/src/dec/vp8li.h
@@ -61,6 +61,8 @@ typedef struct {
   VP8LDecodeState  state_;
   VP8Io           *io_;
 
+  const WebPDecBuffer *output_;    // shortcut to io->opaque->output
+
   uint32_t        *argb_;          // Internal data: always in BGRA color mode.
   uint32_t        *argb_cache_;    // Scratch buffer for temporary BGRA storage.
 
diff --git a/src/webp/decode.h b/src/webp/decode.h
index 9df8f89e..43b6c58f 100644
--- a/src/webp/decode.h
+++ b/src/webp/decode.h
@@ -233,7 +233,7 @@ typedef enum {
 //
 //     // The above call decodes the current available buffer.
 //     // Part of the image can now be refreshed by calling to
-//     // WebPIDecGetRGB()/WebPIDecGetYUV() etc.
+//     // WebPIDecGetRGB()/WebPIDecGetYUVA() etc.
 //   }
 //   WebPIDelete(idec);
 
@@ -260,9 +260,18 @@ WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
 // will output the raw luma/chroma samples into a preallocated planes. The luma
 // plane is specified by its pointer 'luma', its size 'luma_size' and its stride
 // 'luma_stride'. Similarly, the chroma-u plane is specified by the 'u',
-// 'u_size' and 'u_stride' parameters, and the chroma-v plane by 'v', 'v_size'
-// and 'v_size'.
+// 'u_size' and 'u_stride' parameters, and the chroma-v plane by 'v'
+// and 'v_size'. And same for the alpha-plane. The 'a' pointer can be pass
+// NULL in case one is not interested in the transparency plane.
 // Returns NULL if the allocation failed.
+WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
+    uint8_t* luma, size_t luma_size, int luma_stride,
+    uint8_t* u, size_t u_size, int u_stride,
+    uint8_t* v, size_t v_size, int v_stride,
+    uint8_t* a, size_t a_size, int a_stride);
+
+// Deprecated version of the above, without the alpha plane.
+// Kept for backward compatibility.
 WEBP_EXTERN(WebPIDecoder*) WebPINewYUV(
     uint8_t* luma, size_t luma_size, int luma_stride,
     uint8_t* u, size_t u_size, int u_stride,
@@ -296,12 +305,22 @@ WEBP_EXTERN(uint8_t*) WebPIDecGetRGB(
     const WebPIDecoder* idec, int* last_y,
     int* width, int* height, int* stride);
 
-// Same as above function to get YUV image. Returns pointer to the luma plane
-// or NULL in case of error.
-WEBP_EXTERN(uint8_t*) WebPIDecGetYUV(
+// Same as above function to get a YUVA image. Returns pointer to the luma
+// plane or NULL in case of error. If there is no alpha information
+// the alpha pointer '*a' will be returned NULL.
+WEBP_EXTERN(uint8_t*) WebPIDecGetYUVA(
     const WebPIDecoder* idec, int* last_y,
-    uint8_t** u, uint8_t** v,
-    int* width, int* height, int* stride, int* uv_stride);
+    uint8_t** u, uint8_t** v, uint8_t** a,
+    int* width, int* height, int* stride, int* uv_stride, int* a_stride);
+
+// Deprecated alpha-less version of WebPIDecGetYUVA(): it will ignore the
+// alpha information (if present). Kept for backward compatibility.
+static WEBP_INLINE uint8_t* WebPIDecGetYUV(
+    const WebPIDecoder* idec, int* last_y, uint8_t** u, uint8_t** v,
+    int* width, int* height, int* stride, int* uv_stride) {
+  return WebPIDecGetYUVA(idec, last_y, u, v, NULL, width, height,
+                         stride, uv_stride, NULL);
+}
 
 // Generic call to retrieve information about the displayable area.
 // If non NULL, the left/right/width/height pointers are filled with the visible

From 014a711d969265c163c5fe99e878b0f5024894df Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 3 Aug 2012 16:09:59 -0700
Subject: [PATCH 30/42] update NEWS

changes since v0.1.99

Change-Id: Iaab1545516ef8df9f9dd6b4bc9cbf07539cb454f
---
 NEWS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/NEWS b/NEWS
index b155a6a7..1431c705 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
+- 8/3/12: version 0.2.0
+  * Add support for ARGB -> YUVA conversion for lossless decoder
+    New functions: WebPINewYUVA, WebPIDecGetYUVA
+  * Add stats for lossless and alpha encoding
+  * Security related hardening: allocation and size checks
+  * Add PAM output support to dwebp
+
 - 7/19/12: version 0.1.99
   * This is a pre-release of 0.2.0, not an rc to allow for further
     incompatible changes based on user feedback.

From 5934fc59db48dff2e1b6c62ee6b94d39c04dcad9 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 3 Aug 2012 16:15:38 -0700
Subject: [PATCH 31/42] update AUTHORS

Change-Id: I205422ac3be5e363adfc84dcf84f6d5d84b9a40f
---
 AUTHORS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/AUTHORS b/AUTHORS
index 0c3677ef..c6ea612b 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -2,6 +2,7 @@ Contributors:
 - James Zern (jzern at google dot com)
 - Jan Engelhardt (jengelh at medozas dot de)
 - Johann (johannkoenig at google dot com)
+- Jyrki Alakuijala (jyrki at google dot com)
 - Lou Quillio (louquillio at google dot com)
 - Martin Olsson (mnemo at minimum dot se)
 - Mikołaj Zalewski (mikolajz at google dot com)

From 93bf0faafae6a7acab950498a3f395bb09b931fc Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 3 Aug 2012 16:21:12 -0700
Subject: [PATCH 32/42] Update ChangeLog

Change-Id: I5ff337065b8a6f8952dc77c3f9c7798267ee6727
---
 ChangeLog | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 1b397780..dba62f50 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,46 @@
+5934fc5 update AUTHORS
+014a711 update NEWS
+43b0d61 add support for ARGB -> YUVA conversion for lossless decoder
+33705ca bump version to 0.2.0
+c40d7ef fix alpha-plane check + add extra checks
+a06f802 MODE_YUVA: set alpha to opaque if the image has none
+52a87dd Merge "silence one more warning" into 0.2.0
+3b02309 silence one more warning
+f94b04f move some RGB->YUV functions to yuv.h
+4b71ba0 README: sync [cd]webp help output
+c9ae57f man/dwebp.1: add links to output file format details
+292ec5c quiet a few 'uninitialized' warnings
+4af3f6c fix indentation
+9b261bf remove the last NOT_HAVE_LOG2 instances
+323dc4d remove use of log2(). Use VP8LFastLog2() instead.
+8c515d5 Merge "harness some malloc/calloc to use WebPSafeMalloc and WebPSafeCalloc" into 0.2.0
+d4b4bb0 Merge changes I46090628,I1a41b2ce into 0.2.0
+bff34ac harness some malloc/calloc to use WebPSafeMalloc and WebPSafeCalloc
+a3c063c Merge "extra size check for security" into 0.2.0
+5e79630 Merge "WebPEncode: clear stats at the start of encode" into 0.2.0
+f1edf62 Merge "rationalize use of color-cache" into 0.2.0
+c193331 extra size check for security
+906be65 rationalize use of color-cache
+dd1c387 Add image-hint for low-color images.
+4eb7aa6 Merge "WebPCheckMalloc() and WebPCheckCalloc():" into 0.2.0
+80cc730 WebPCheckMalloc() and WebPCheckCalloc():
+183cba8 check VP8LBitWriterInit return
+cbfa9ee lossless: fix crash on user abort
+256afef cwebp: exit immediately on version mismatch
+475d87d WebPEncode: clear stats at the start of encode
+a7cc729 fix type and conversion warnings
+7d853d7 add stats for lossless
+d39177b make QuantizeLevels() store the sum of squared error
+5955cf5 replace x*155/100 by x*101581>>16
+7d732f9 make QuantizeLevels() store the sum of squared error
+e45a446 replace x*155/100 by x*101581>>16
+159b75d cwebp output size consistency:
+cbee59e Merge commit 'v0.1.99'
+1889e9b dwebp: report -alpha option
+3bc3f7c Merge "dwebp: add PAM output support" into 0.2.0
+d919ed0 dwebp: add PAM output support
+85e215d README/manpages/configure: update website link
+c3a207b Update ChangeLog (v0.1.99)
 d1fd782 Merge "add extra precision about default values and behaviour" into 0.2.0
 efc826e add extra precision about default values and behaviour
 9f29635 header/doc clean up
@@ -14,6 +57,7 @@ c37c23e README: cosmetics
 ce90847 Merge "add some padding bytes areas for later use" into 0.2.0
 2390dab Merge "fixing the findings by Frederic Kayser to the bitstream spec" into 0.2.0
 0275159 add a very crude progress report for lossless
+a4b9b1c Remove some unused enum values.
 dd10817 rename 'use_argb_input' to 'use_argb'
 90516ae add some padding bytes areas for later use
 d03b250 fixing the findings by Frederic Kayser to the bitstream spec
@@ -46,6 +90,7 @@ c3b014d Android.mk: add missing lossless files
 8c1cc6b makefile.unix dist: explicitly name installed includes
 7f4647e Merge "clarify the colorspace naming and byte ordering of decoded samples" into 0.2.0
 cbf6972 clarify the colorspace naming and byte ordering of decoded samples
+857650c Mux: Add WebPDataInit() and remove WebPImageInfo
 ff771e7 don't install webp/decode_vp8.h
 596dff7 VP8LFillBitWindow: use 64-bit path for msvc x64 builds
 3ca7ce9 Merge "doc: remove non-finalized chunk references" into 0.2.0

From 6f4272b090223dc3f47414cb41ac86356a3b9a2e Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Tue, 7 Aug 2012 22:41:25 -0700
Subject: [PATCH 33/42] remove unused ApplyInverseTransform()

transforms are only allowed for is_level0

Change-Id: Iec8ce8bdbe024aae6cae2688e2934ab8f445000c
---
 src/dec/vp8l.c | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index 70edbeb6..39fa4d10 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -797,19 +797,6 @@ static void ClearTransform(VP8LTransform* const transform) {
   transform->data_ = NULL;
 }
 
-static void ApplyInverseTransforms(VP8LDecoder* const dec, int start_idx,
-                                   uint32_t* const decoded_data) {
-  int n = dec->next_transform_;
-  assert(start_idx >= 0);
-  while (n-- > start_idx) {
-    VP8LTransform* const transform = &dec->transforms_[n];
-    VP8LInverseTransform(transform, 0, transform->ysize_,
-                         decoded_data, decoded_data);
-    ClearTransform(transform);
-  }
-  dec->next_transform_ = start_idx;
-}
-
 // For security reason, we need to remap the color map to span
 // the total possible bundled values, and not just the num_colors.
 static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
@@ -964,7 +951,6 @@ static int DecodeImageStream(int xsize, int ysize,
   VP8LBitReader* const br = &dec->br_;
   VP8LMetadata* const hdr = &dec->hdr_;
   uint32_t* data = NULL;
-  const int transform_start_idx = dec->next_transform_;
   int color_cache_bits = 0;
 
   // Read the transforms (may recurse).
@@ -1024,9 +1010,6 @@ static int DecodeImageStream(int xsize, int ysize,
   ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, NULL);
   ok = ok && !br->error_;
 
-  // Apply transforms on the decoded data.
-  if (ok) ApplyInverseTransforms(dec, transform_start_idx, data);
-
  End:
 
   if (!ok) {

From 62dd9bb242c800881af79000551f7b1dc4eb53fb Mon Sep 17 00:00:00 2001
From: Vikas Arora <vikasa@google.com>
Date: Wed, 8 Aug 2012 14:28:25 -0700
Subject: [PATCH 34/42] Update encoding heuristic w.r.t palette colors.

Added a threshold of MAX_COLORS_FOR_GRAPH for color-palettes, above
which the graph hint is ignored.

Change-Id: Ia5d7f45e52731b6eaf2806999d6be82861744fd3
---
 src/enc/vp8l.c | 51 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c
index 0d0fe65e..9c202f8d 100644
--- a/src/enc/vp8l.c
+++ b/src/enc/vp8l.c
@@ -29,6 +29,7 @@ extern "C" {
 
 #define PALETTE_KEY_RIGHT_SHIFT   22  // Key for 1K buffer.
 #define MAX_HUFF_IMAGE_SIZE       (16 * 1024 * 1024)
+#define MAX_COLORS_FOR_GRAPH      64
 
 // -----------------------------------------------------------------------------
 // Palette
@@ -98,11 +99,11 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
   return 1;
 }
 
-static int AnalyzeEntropy(const WebPPicture* const pic,
+static int AnalyzeEntropy(const uint32_t* argb,
+                          int width, int height, int argb_stride,
                           double* const nonpredicted_bits,
                           double* const predicted_bits) {
   int x, y;
-  const uint32_t* argb = pic->argb;
   const uint32_t* last_line = NULL;
   uint32_t last_pix = argb[0];    // so we're sure that pix_diff == 0
 
@@ -114,8 +115,8 @@ static int AnalyzeEntropy(const WebPPicture* const pic,
 
   VP8LHistogramInit(predicted, 0);
   VP8LHistogramInit(nonpredicted, 0);
-  for (y = 0; y < pic->height; ++y) {
-    for (x = 0; x < pic->width; ++x) {
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
       const uint32_t pix = argb[x];
       const uint32_t pix_diff = VP8LSubPixels(pix, last_pix);
       if (pix_diff == 0) continue;
@@ -131,7 +132,7 @@ static int AnalyzeEntropy(const WebPPicture* const pic,
       }
     }
     last_line = argb;
-    argb += pic->argb_stride;
+    argb += argb_stride;
   }
   *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(nonpredicted);
   *predicted_bits = VP8LHistogramEstimateBitsBulk(predicted);
@@ -143,24 +144,35 @@ static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) {
   const WebPPicture* const pic = enc->pic_;
   assert(pic != NULL && pic->argb != NULL);
 
-  enc->use_palette_ = (image_hint == WEBP_HINT_GRAPH) ? 0 :
+  enc->use_palette_ =
       AnalyzeAndCreatePalette(pic, enc->palette_, &enc->palette_size_);
-  if (!enc->use_palette_) {
-    if (image_hint == WEBP_HINT_DEFAULT) {
-      double non_pred_entropy, pred_entropy;
-      if (!AnalyzeEntropy(pic, &non_pred_entropy, &pred_entropy)) {
-        return 0;
-      }
 
-      if (pred_entropy < 0.95 * non_pred_entropy) {
-        enc->use_predict_ = 1;
-        enc->use_cross_color_ = 1;
-      }
-    } else if (image_hint == WEBP_HINT_PHOTO) {
-      enc->use_predict_ = 1;
-      enc->use_cross_color_ = 1;
+  if (image_hint == WEBP_HINT_GRAPH) {
+    if (enc->use_palette_ && enc->palette_size_ < MAX_COLORS_FOR_GRAPH) {
+      enc->use_palette_ = 0;
     }
   }
+
+  if (!enc->use_palette_) {
+    if (image_hint == WEBP_HINT_PHOTO) {
+      enc->use_predict_ = 1;
+      enc->use_cross_color_ = 1;
+    } else {
+      double non_pred_entropy, pred_entropy;
+      if (!AnalyzeEntropy(pic->argb, pic->width, pic->height, pic->argb_stride,
+                          &non_pred_entropy, &pred_entropy)) {
+        return 0;
+      }
+      if (pred_entropy < 0.95 * non_pred_entropy) {
+        enc->use_predict_ = 1;
+        // TODO(vikasa): Observed some correlation of cross_color transform with
+        // predict. Need to investigate this further and add separate heuristic
+        // for setting use_cross_color flag.
+        enc->use_cross_color_ = 1;
+      }
+    }
+  }
+
   return 1;
 }
 
@@ -961,6 +973,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
   if (enc->use_palette_) {
     err = ApplyPalette(bw, enc, quality);
     if (err != VP8_ENC_OK) goto Error;
+    // Color cache is disabled for palette.
     enc->cache_bits_ = 0;
   }
 

From a0a488554deb95c596611c498af9ac187bd61425 Mon Sep 17 00:00:00 2001
From: Urvang Joshi <urvang@google.com>
Date: Thu, 9 Aug 2012 02:44:38 +0530
Subject: [PATCH 35/42] Lossless decoder fix for a special transform order

Fix the lossless decoder for the case when it has to apply other
inverse transforms before applying Color indexing inverse transform.

The main idea is to make ColorIndexingInverse virtually in-place: we
use the fact that the argb_cache is allocated to accommodate all
*unpacked* pixels of a macro-row, not just *packed* pixels.

Change-Id: I27f11f3043f863dfd753cc2580bc5b36376800c4
---
 src/dec/vp8l.c     | 14 ++++++++------
 src/dsp/lossless.c | 16 +++++++++++++++-
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index 39fa4d10..ab8d2e3b 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -615,20 +615,22 @@ static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,
 
 typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
 
-static void ApplyTransforms(VP8LDecoder* const dec, int num_rows,
+static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
                             const uint32_t* const rows) {
   int n = dec->next_transform_;
   const int cache_pixs = dec->width_ * num_rows;
-  uint32_t* rows_data = dec->argb_cache_;
   const int start_row = dec->last_row_;
   const int end_row = start_row + num_rows;
+  const uint32_t* rows_in = rows;
+  uint32_t* const rows_out = dec->argb_cache_;
 
   // Inverse transforms.
   // TODO: most transforms only need to operate on the cropped region only.
-  memcpy(rows_data, rows, cache_pixs * sizeof(*rows_data));
+  memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
   while (n-- > 0) {
     VP8LTransform* const transform = &dec->transforms_[n];
-    VP8LInverseTransform(transform, start_row, end_row, rows, rows_data);
+    VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out);
+    rows_in = rows_out;
   }
 }
 
@@ -639,7 +641,7 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
   const int num_rows = row - dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
-  ApplyTransforms(dec, num_rows, rows);
+  ApplyInverseTransforms(dec, num_rows, rows);
 
   // Emit output.
   {
@@ -1066,7 +1068,7 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
   const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
-  ApplyTransforms(dec, num_rows, in);
+  ApplyInverseTransforms(dec, num_rows, in);
 
   // Extract alpha (which is stored in the green plane).
   {
diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
index 472e641e..00f8399a 100644
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -935,7 +935,7 @@ static void ColorIndexInverseTransform(
       uint32_t packed_pixels = 0;
       int x;
       for (x = 0; x < width; ++x) {
-        // We need to load fresh 'packed_pixels' once every 'bytes_per_pixels'
+        // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
         // increments of x. Fortunately, pixels_per_byte is a power of 2, so
         // can just use a mask for that, instead of decrementing a counter.
         if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
@@ -976,7 +976,21 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
       ColorSpaceInverseTransform(transform, row_start, row_end, out);
       break;
     case COLOR_INDEXING_TRANSFORM:
+      if (in == out && transform->bits_ > 0) {
+        // Move packed pixels to the end of unpacked region, so that unpacking
+        // can occur seamlessly.
+        // Also, note that this is the only transform that applies on
+        // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
+        // transforms work on effective width of xsize_.
+        const int out_stride = (row_end - row_start) * transform->xsize_;
+        const int in_stride = (row_end - row_start) *
+            VP8LSubSampleSize(transform->xsize_, transform->bits_);
+        uint32_t* const src = out + out_stride - in_stride;
+        memmove(src, out, in_stride * sizeof(*src));
+        ColorIndexInverseTransform(transform, row_start, row_end, src, out);
+      } else {
       ColorIndexInverseTransform(transform, row_start, row_end, in, out);
+      }
       break;
   }
 }

From 528a11af35c7f68a641ea782a4dca18b3835b5b3 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Thu, 9 Aug 2012 11:32:30 -0700
Subject: [PATCH 36/42] fix the ARGB4444 premultiply arithmetic

* green was not descaled properly
* alpha was over-dithered, making the value '0x0f' not be a fixed point
* alpha value was not restored ok.

Change-Id: Ia4a4d75bdad41257f7c07ef76a487065ac36fede
---
 src/dsp/upsampling.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c
index 9ca04927..4855eb14 100644
--- a/src/dsp/upsampling.c
+++ b/src/dsp/upsampling.c
@@ -271,8 +271,7 @@ static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
 
 // rgbA4444
 
-#define MULTIPLIER(a)  ((a) * 0x11)
-#define PREMULTIPLY(x, m) (((x) * (m)) >> 12)
+#define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
 
 static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
   return (x & 0xf0) | (x >> 4);
@@ -282,24 +281,27 @@ static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
   return (x & 0x0f) | (x << 4);
 }
 
+static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
+  return (x * m) >> 16;
+}
+
 static void ApplyAlphaMultiply4444(uint8_t* rgba4444,
                                    int w, int h, int stride) {
   while (h-- > 0) {
     int i;
     for (i = 0; i < w; ++i) {
-      const uint8_t a = dither_lo(rgba4444[2 * i + 1]);
+      const uint8_t a = (rgba4444[2 * i + 1] & 0x0f);
       const uint32_t mult = MULTIPLIER(a);
-      const uint8_t r = PREMULTIPLY(dither_hi(rgba4444[2 * i + 0]), mult);
-      const uint8_t g = PREMULTIPLY(dither_lo(rgba4444[2 * i + 0]), mult);
-      const uint8_t b = PREMULTIPLY(dither_hi(rgba4444[2 * i + 1]), mult);
-      rgba4444[2 * i + 0] = (r & 0xf0) | (g & 0x0f);
+      const uint8_t r = multiply(dither_hi(rgba4444[2 * i + 0]), mult);
+      const uint8_t g = multiply(dither_lo(rgba4444[2 * i + 0]), mult);
+      const uint8_t b = multiply(dither_hi(rgba4444[2 * i + 1]), mult);
+      rgba4444[2 * i + 0] = (r & 0xf0) | ((g >> 4) & 0x0f);
       rgba4444[2 * i + 1] = (b & 0xf0) | a;
     }
     rgba4444 += stride;
   }
 }
 #undef MULTIPLIER
-#undef PREMULTIPLY
 
 void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int)
     = ApplyAlphaMultiply;

From 6fe843baeb1f3d94fd5109997276e2c308625f73 Mon Sep 17 00:00:00 2001
From: Pascal Massimino <pascal.massimino@gmail.com>
Date: Thu, 9 Aug 2012 11:33:29 -0700
Subject: [PATCH 37/42] avoid rgb-premultiply if there's only trivial alpha
 values

With this, MODE_rgbA can safely be used without speed penalty
even in case of pure-lossy alpha-less input.
It's also an optimization when cropping a fully-opaque region from
an image with alpha: premultiply is then skipped

Change-Id: Ibee28c75744f193dacdfccd5a2e7cd1e44604db6
---
 src/dec/io.c | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/dec/io.c b/src/dec/io.c
index 2c75f782..b90f6c5e 100644
--- a/src/dec/io.c
+++ b/src/dec/io.c
@@ -196,6 +196,7 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
     const WebPRGBABuffer* const buf = &p->output->u.RGBA;
     int start_y = io->mb_y;
     int num_rows = mb_h;
+    uint32_t alpha_mask = 0xff;
 
     // We compensate for the 1-line delay of fancy upscaler.
     // This is similar to EmitFancyRGB().
@@ -219,11 +220,16 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
       uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
       uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
       for (j = 0; j < num_rows; ++j) {
-        for (i = 0; i < mb_w; ++i) dst[4 * i] = alpha[i];
+        for (i = 0; i < mb_w; ++i) {
+          const uint32_t alpha_value = alpha[i];
+          dst[4 * i] = alpha_value;
+          alpha_mask &= alpha_value;
+        }
         alpha += io->width;
         dst += buf->stride;
       }
-      if (WebPIsPremultipliedMode(colorspace)) {
+      // alpha_mask is < 0xff if there's non-trivial alpha to premultiply with.
+      if (alpha_mask != 0xff && WebPIsPremultipliedMode(colorspace)) {
         WebPApplyAlphaMultiply(base_rgba, alpha_first,
                                mb_w, num_rows, buf->stride);
       }
@@ -241,16 +247,18 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
     const WebPRGBABuffer* const buf = &p->output->u.RGBA;
     uint8_t* const base_rgba = buf->rgba + io->mb_y * buf->stride;
     uint8_t* alpha_dst = base_rgba + 1;
+    uint32_t alpha_mask = 0x0f;
     for (j = 0; j < mb_h; ++j) {
       for (i = 0; i < mb_w; ++i) {
         // Fill in the alpha value (converted to 4 bits).
-        const uint32_t alpha_val = VP8Clip4Bits(alpha[i]);
-        alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_val;
+        const uint32_t alpha_value = VP8Clip4Bits(alpha[i]);
+        alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+        alpha_mask &= alpha_value;
       }
       alpha += io->width;
       alpha_dst += buf->stride;
     }
-    if (p->output->colorspace == MODE_rgbA_4444) {
+    if (alpha_mask != 0x0f && p->output->colorspace == MODE_rgbA_4444) {
       WebPApplyAlphaMultiply4444(base_rgba, mb_w, mb_h, buf->stride);
     }
   }
@@ -396,17 +404,22 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos) {
   uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
   int num_lines_out = 0;
   const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+  uint32_t alpha_mask = 0xff;
   const int width = p->scaler_a.dst_width;
 
   while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
     int i;
     assert(p->last_y + y_pos + num_lines_out < p->output->height);
     WebPRescalerExportRow(&p->scaler_a);
-    for (i = 0; i < width; ++i) dst[4 * i] = p->scaler_a.dst[i];
+    for (i = 0; i < width; ++i) {
+      const uint32_t alpha_value = p->scaler_a.dst[i];
+      dst[4 * i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
     dst += buf->stride;
     ++num_lines_out;
   }
-  if (is_premult_alpha) {
+  if (is_premult_alpha && alpha_mask != 0xff) {
     WebPApplyAlphaMultiply(base_rgba, alpha_first,
                            width, num_lines_out, buf->stride);
   }
@@ -421,6 +434,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
   const WEBP_CSP_MODE colorspace = p->output->colorspace;
   const int width = p->scaler_a.dst_width;
   const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+  uint32_t alpha_mask = 0x0f;
 
   while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
     int i;
@@ -428,13 +442,14 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
     WebPRescalerExportRow(&p->scaler_a);
     for (i = 0; i < width; ++i) {
       // Fill in the alpha value (converted to 4 bits).
-      const uint32_t alpha_val = VP8Clip4Bits(p->scaler_a.dst[i]);
-      alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_val;
+      const uint32_t alpha_value = VP8Clip4Bits(p->scaler_a.dst[i]);
+      alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+      alpha_mask &= alpha_value;
     }
     alpha_dst += buf->stride;
     ++num_lines_out;
   }
-  if (is_premult_alpha) {
+  if (is_premult_alpha && alpha_mask != 0x0f) {
     WebPApplyAlphaMultiply4444(base_rgba, width, num_lines_out, buf->stride);
   }
   return num_lines_out;

From f56e98fd11e687175ccc7f52f13f28c7df5419de Mon Sep 17 00:00:00 2001
From: Urvang Joshi <urvang@google.com>
Date: Thu, 9 Aug 2012 23:27:29 +0530
Subject: [PATCH 38/42] Alignment fix

Change-Id: Ia5475247f03456b01571ae7531da90f74c068045
---
 src/dec/vp8l.c     | 2 +-
 src/dsp/lossless.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index ab8d2e3b..897e4395 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -616,7 +616,7 @@ static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,
 typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
 
 static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
-                            const uint32_t* const rows) {
+                                   const uint32_t* const rows) {
   int n = dec->next_transform_;
   const int cache_pixs = dec->width_ * num_rows;
   const int start_row = dec->last_row_;
diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
index 00f8399a..62a6b7b1 100644
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -989,7 +989,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
         memmove(src, out, in_stride * sizeof(*src));
         ColorIndexInverseTransform(transform, row_start, row_end, src, out);
       } else {
-      ColorIndexInverseTransform(transform, row_start, row_end, in, out);
+        ColorIndexInverseTransform(transform, row_start, row_end, in, out);
       }
       break;
   }

From 681cb30ad27985fe2dbcf7302fcc4d8b8a3ee1af Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Sat, 11 Aug 2012 20:08:21 -0700
Subject: [PATCH 39/42] fix RGBA4444 output w/fancy upsampling

compensates for the 1-line delay in the upscaler, outputting the correct
alpha row

Change-Id: Ia9a65bcb3cfa19e42185523cc6e706101a39d45d
---
 src/dec/io.c | 81 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 46 insertions(+), 35 deletions(-)

diff --git a/src/dec/io.c b/src/dec/io.c
index b90f6c5e..e318c919 100644
--- a/src/dec/io.c
+++ b/src/dec/io.c
@@ -184,38 +184,45 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
   return 0;
 }
 
+static int GetAlphaSourceRow(const VP8Io* const io,
+                             const uint8_t** alpha, int* const num_rows) {
+  int start_y = io->mb_y;
+  *num_rows = io->mb_h;
+
+  // Compensate for the 1-line delay of the fancy upscaler.
+  // This is similar to EmitFancyRGB().
+  if (io->fancy_upsampling) {
+    if (start_y == 0) {
+      // We don't process the last row yet. It'll be done during the next call.
+      --*num_rows;
+    } else {
+      --start_y;
+      // Fortunately, *alpha data is persistent, so we can go back
+      // one row and finish alpha blending, now that the fancy upscaler
+      // completed the YUV->RGB interpolation.
+      *alpha -= io->width;
+    }
+    if (io->crop_top + io->mb_y + io->mb_h == io->crop_bottom) {
+      // If it's the very last call, we process all the remaing rows!
+      *num_rows = io->crop_bottom - io->crop_top - start_y;
+    }
+  }
+  return start_y;
+}
+
 static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
   const uint8_t* alpha = io->a;
   if (alpha != NULL) {
     const int mb_w = io->mb_w;
-    const int mb_h = io->mb_h;
     int i, j;
     const WEBP_CSP_MODE colorspace = p->output->colorspace;
     const int alpha_first =
         (colorspace == MODE_ARGB || colorspace == MODE_Argb);
     const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-    int start_y = io->mb_y;
-    int num_rows = mb_h;
+    int num_rows;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
     uint32_t alpha_mask = 0xff;
 
-    // We compensate for the 1-line delay of fancy upscaler.
-    // This is similar to EmitFancyRGB().
-    if (io->fancy_upsampling) {
-      if (start_y == 0) {
-        // We don't process the last row yet. It'll be done during next call.
-        --num_rows;
-      } else {
-        --start_y;
-        // Fortunately, *alpha data is persistent, so we can go back
-        // one row and finish alpha blending, now that the fancy upscaler
-        // completed the YUV->RGB interpolation.
-        alpha -= io->width;
-      }
-      if (io->crop_top + io->mb_y + mb_h == io->crop_bottom) {
-        // If it's the very last call, we process all the remaing rows!
-        num_rows = io->crop_bottom - io->crop_top - start_y;
-      }
-    }
     {
       uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
       uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
@@ -242,24 +249,28 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
   const uint8_t* alpha = io->a;
   if (alpha != NULL) {
     const int mb_w = io->mb_w;
-    const int mb_h = io->mb_h;
     int i, j;
     const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-    uint8_t* const base_rgba = buf->rgba + io->mb_y * buf->stride;
-    uint8_t* alpha_dst = base_rgba + 1;
+    int num_rows;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
     uint32_t alpha_mask = 0x0f;
-    for (j = 0; j < mb_h; ++j) {
-      for (i = 0; i < mb_w; ++i) {
-        // Fill in the alpha value (converted to 4 bits).
-        const uint32_t alpha_value = VP8Clip4Bits(alpha[i]);
-        alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
-        alpha_mask &= alpha_value;
+
+    {
+      uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+      uint8_t* alpha_dst = base_rgba + 1;
+      for (j = 0; j < num_rows; ++j) {
+        for (i = 0; i < mb_w; ++i) {
+          // Fill in the alpha value (converted to 4 bits).
+          const uint32_t alpha_value = VP8Clip4Bits(alpha[i]);
+          alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+          alpha_mask &= alpha_value;
+        }
+        alpha += io->width;
+        alpha_dst += buf->stride;
+      }
+      if (alpha_mask != 0x0f && p->output->colorspace == MODE_rgbA_4444) {
+        WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
       }
-      alpha += io->width;
-      alpha_dst += buf->stride;
-    }
-    if (alpha_mask != 0x0f && p->output->colorspace == MODE_rgbA_4444) {
-      WebPApplyAlphaMultiply4444(base_rgba, mb_w, mb_h, buf->stride);
     }
   }
   return 0;

From fe1958f17dc3f6debc0de17c5983993040b8e71b Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Sat, 11 Aug 2012 20:14:33 -0700
Subject: [PATCH 40/42] RGBA4444: harmonize lossless/lossy alpha values

lossy was rounding with a bias toward opaque:
[232+, 8] -> [15, 1]
now both paths use the range:
[240+, 16] -> [15, 1]

Change-Id: I3da2063b4959b9e9f45bae09e640acc1f43470c5
---
 src/dec/io.c  | 4 ++--
 src/dsp/yuv.h | 5 -----
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/dec/io.c b/src/dec/io.c
index e318c919..c5746f74 100644
--- a/src/dec/io.c
+++ b/src/dec/io.c
@@ -261,7 +261,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
       for (j = 0; j < num_rows; ++j) {
         for (i = 0; i < mb_w; ++i) {
           // Fill in the alpha value (converted to 4 bits).
-          const uint32_t alpha_value = VP8Clip4Bits(alpha[i]);
+          const uint32_t alpha_value = alpha[i] >> 4;
           alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
           alpha_mask &= alpha_value;
         }
@@ -453,7 +453,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
     WebPRescalerExportRow(&p->scaler_a);
     for (i = 0; i < width; ++i) {
       // Fill in the alpha value (converted to 4 bits).
-      const uint32_t alpha_value = VP8Clip4Bits(p->scaler_a.dst[i]);
+      const uint32_t alpha_value = p->scaler_a.dst[i] >> 4;
       alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
       alpha_mask &= alpha_value;
     }
diff --git a/src/dsp/yuv.h b/src/dsp/yuv.h
index 22cb2596..a569109c 100644
--- a/src/dsp/yuv.h
+++ b/src/dsp/yuv.h
@@ -90,11 +90,6 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
   rgba[3] = 0xff;
 }
 
-static WEBP_INLINE uint32_t VP8Clip4Bits(uint8_t c) {
-  const uint32_t v = (c + 8) >> 4;
-  return (v > 15) ? 15 : v;
-}
-
 // Must be called before everything, to initialize the tables.
 void VP8YUVInit(void);
 

From c655380c36546f44d2ffdfc3936c694ad06e098a Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Tue, 14 Aug 2012 13:42:13 -0700
Subject: [PATCH 41/42] dec/io.c: cosmetics

- deindent EmitAlphaRGB*
- add some missing consts

Change-Id: I65f88da295e6a0afa383fadc2ef90a40613c2d62
---
 src/dec/io.c | 74 +++++++++++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 39 deletions(-)

diff --git a/src/dec/io.c b/src/dec/io.c
index c5746f74..594804c2 100644
--- a/src/dec/io.c
+++ b/src/dec/io.c
@@ -111,7 +111,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
   const uint8_t* top_u = p->tmp_u;
   const uint8_t* top_v = p->tmp_v;
   int y = io->mb_y;
-  int y_end = io->mb_y + io->mb_h;
+  const int y_end = io->mb_y + io->mb_h;
   const int mb_w = io->mb_w;
   const int uv_w = (mb_w + 1) / 2;
 
@@ -150,7 +150,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
     // Process the very last row of even-sized picture
     if (!(y_end & 1)) {
       upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v,
-              dst + buf->stride, NULL, mb_w);
+               dst + buf->stride, NULL, mb_w);
     }
   }
   return num_lines_out;
@@ -203,7 +203,7 @@ static int GetAlphaSourceRow(const VP8Io* const io,
       *alpha -= io->width;
     }
     if (io->crop_top + io->mb_y + io->mb_h == io->crop_bottom) {
-      // If it's the very last call, we process all the remaing rows!
+      // If it's the very last call, we process all the remaining rows!
       *num_rows = io->crop_bottom - io->crop_top - start_y;
     }
   }
@@ -214,32 +214,30 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
   const uint8_t* alpha = io->a;
   if (alpha != NULL) {
     const int mb_w = io->mb_w;
-    int i, j;
     const WEBP_CSP_MODE colorspace = p->output->colorspace;
     const int alpha_first =
         (colorspace == MODE_ARGB || colorspace == MODE_Argb);
     const WebPRGBABuffer* const buf = &p->output->u.RGBA;
     int num_rows;
     const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+    uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
     uint32_t alpha_mask = 0xff;
+    int i, j;
 
-    {
-      uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
-      uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
-      for (j = 0; j < num_rows; ++j) {
-        for (i = 0; i < mb_w; ++i) {
-          const uint32_t alpha_value = alpha[i];
-          dst[4 * i] = alpha_value;
-          alpha_mask &= alpha_value;
-        }
-        alpha += io->width;
-        dst += buf->stride;
-      }
-      // alpha_mask is < 0xff if there's non-trivial alpha to premultiply with.
-      if (alpha_mask != 0xff && WebPIsPremultipliedMode(colorspace)) {
-        WebPApplyAlphaMultiply(base_rgba, alpha_first,
-                               mb_w, num_rows, buf->stride);
+    for (j = 0; j < num_rows; ++j) {
+      for (i = 0; i < mb_w; ++i) {
+        const uint32_t alpha_value = alpha[i];
+        dst[4 * i] = alpha_value;
+        alpha_mask &= alpha_value;
       }
+      alpha += io->width;
+      dst += buf->stride;
+    }
+    // alpha_mask is < 0xff if there's non-trivial alpha to premultiply with.
+    if (alpha_mask != 0xff && WebPIsPremultipliedMode(colorspace)) {
+      WebPApplyAlphaMultiply(base_rgba, alpha_first,
+                             mb_w, num_rows, buf->stride);
     }
   }
   return 0;
@@ -249,28 +247,27 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
   const uint8_t* alpha = io->a;
   if (alpha != NULL) {
     const int mb_w = io->mb_w;
-    int i, j;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
     const WebPRGBABuffer* const buf = &p->output->u.RGBA;
     int num_rows;
     const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+    uint8_t* alpha_dst = base_rgba + 1;
     uint32_t alpha_mask = 0x0f;
+    int i, j;
 
-    {
-      uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
-      uint8_t* alpha_dst = base_rgba + 1;
-      for (j = 0; j < num_rows; ++j) {
-        for (i = 0; i < mb_w; ++i) {
-          // Fill in the alpha value (converted to 4 bits).
-          const uint32_t alpha_value = alpha[i] >> 4;
-          alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
-          alpha_mask &= alpha_value;
-        }
-        alpha += io->width;
-        alpha_dst += buf->stride;
-      }
-      if (alpha_mask != 0x0f && p->output->colorspace == MODE_rgbA_4444) {
-        WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
+    for (j = 0; j < num_rows; ++j) {
+      for (i = 0; i < mb_w; ++i) {
+        // Fill in the alpha value (converted to 4 bits).
+        const uint32_t alpha_value = alpha[i] >> 4;
+        alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+        alpha_mask &= alpha_value;
       }
+      alpha += io->width;
+      alpha_dst += buf->stride;
+    }
+    if (alpha_mask != 0x0f && WebPIsPremultipliedMode(colorspace)) {
+      WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
     }
   }
   return 0;
@@ -497,8 +494,7 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
     tmp_size1 += work_size;
     tmp_size2 += out_width;
   }
-  p->memory =
-      calloc(1, tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp));
+  p->memory = calloc(1, tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp));
   if (p->memory == NULL) {
     return 0;   // memory error
   }
@@ -595,7 +591,7 @@ static int CustomSetup(VP8Io* io) {
 //------------------------------------------------------------------------------
 
 static int CustomPut(const VP8Io* io) {
-  WebPDecParams* p = (WebPDecParams*)io->opaque;
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
   const int mb_w = io->mb_w;
   const int mb_h = io->mb_h;
   int num_lines_out;

From 4238bc0adbacf6d5021a6578e66332786ae6a5b6 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Wed, 15 Aug 2012 22:49:03 -0700
Subject: [PATCH 42/42] Update ChangeLog

Change-Id: I598aaf69c1a45a694c36f2f3166ed9adc20ace84
---
 ChangeLog | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index dba62f50..0dcf446b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+c655380 dec/io.c: cosmetics
+fe1958f RGBA4444: harmonize lossless/lossy alpha values
+681cb30 fix RGBA4444 output w/fancy upsampling
+f06c1d8 Merge "Alignment fix" into 0.2.0
+f56e98f Alignment fix
+6fe843b avoid rgb-premultiply if there's only trivial alpha values
+528a11a fix the ARGB4444 premultiply arithmetic
+a0a4885 Lossless decoder fix for a special transform order
+62dd9bb Update encoding heuristic w.r.t palette colors.
+6f4272b remove unused ApplyInverseTransform()
+93bf0fa Update ChangeLog (v0.2.0-rc1)
 5934fc5 update AUTHORS
 014a711 update NEWS
 43b0d61 add support for ARGB -> YUVA conversion for lossless decoder