From 06b7bc7decae8564310cff5325c9bf7c2233411f Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Thu, 4 Oct 2018 17:25:42 +0200
Subject: [PATCH 01/12] Fix compilation on windows and clang-cl+ninja.

Change-Id: I4e468519e1bcb99da5057f3b6646b077a1e0e7f1
(cherry picked from commit a376e7b96a255e93aa87e23e46c1c518496abe9d)
---
 cmake/cpu.cmake | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/cmake/cpu.cmake b/cmake/cpu.cmake
index 27ed4f2a..da9a42a4 100644
--- a/cmake/cpu.cmake
+++ b/cmake/cpu.cmake
@@ -28,18 +28,18 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
 endfunction()
 
 # those are included in the names of WEBP_USE_* in c++ code.
-set(WEBP_SIMD_FLAGS "SSE2;SSE41;MIPS32;MIPS_DSP_R2;NEON;MSA")
+set(WEBP_SIMD_FLAGS "SSE41;SSE2;MIPS32;MIPS_DSP_R2;NEON;MSA")
 set(WEBP_SIMD_FILE_EXTENSIONS
-    "_sse2.c;_sse41.c;_mips32.c;_mips_dsp_r2.c;_neon.c;_msa.c")
+    "_sse41.c;_sse2.c;_mips32.c;_mips_dsp_r2.c;_neon.c;_msa.c")
 if(MSVC)
-  # MSVC does not have a SSE4 flag but AVX2 support implies SSE4 support.
-  set(SIMD_ENABLE_FLAGS "/arch:SSE2;/arch:AVX2;;;;")
+  # MSVC does not have a SSE4 flag but AVX support implies SSE4 support.
+  set(SIMD_ENABLE_FLAGS "/arch:AVX;/arch:SSE2;;;;")
   set(SIMD_DISABLE_FLAGS)
 else()
   set(SIMD_ENABLE_FLAGS
-      "-msse2;-msse4.1;-mips32;-mdspr2;-mfpu=neon;-mmsa")
+      "-msse4.1;-msse2;-mips32;-mdspr2;-mfpu=neon;-mmsa")
   set(SIMD_DISABLE_FLAGS
-      "-mno-sse2;-mno-sse4.1;;-mno-dspr2;;-mno-msa")
+      "-mno-sse4.1;-mno-sse2;;-mno-dspr2;;-mno-msa")
 endif()
 
 set(WEBP_SIMD_FILES_TO_NOT_INCLUDE)
@@ -57,6 +57,7 @@ endif()
 
 list(LENGTH WEBP_SIMD_FLAGS WEBP_SIMD_FLAGS_LENGTH)
 math(EXPR WEBP_SIMD_FLAGS_RANGE "${WEBP_SIMD_FLAGS_LENGTH} - 1")
+unset(HIGHEST_SSE_FLAG)
 
 foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
   list(GET WEBP_SIMD_FLAGS ${I_SIMD} WEBP_SIMD_FLAG)
@@ -72,17 +73,28 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
     set(CMAKE_REQUIRED_FLAGS ${SIMD_COMPILE_FLAG})
     webp_check_compiler_flag(${WEBP_SIMD_FLAG} ${WEBP_ENABLE_SIMD})
   else()
-    set(SIMD_COMPILE_FLAG " ")
+    if(MSVC)
+      list(GET SIMD_ENABLE_FLAGS ${I_SIMD} SIMD_COMPILE_FLAG)
+    else()
+      set(SIMD_COMPILE_FLAG " ")
+    endif()
   endif()
   # Check which files we should include or not.
   list(GET WEBP_SIMD_FILE_EXTENSIONS ${I_SIMD} WEBP_SIMD_FILE_EXTENSION)
   file(GLOB SIMD_FILES "${CMAKE_CURRENT_LIST_DIR}/../"
             "src/dsp/*${WEBP_SIMD_FILE_EXTENSION}")
   if(WEBP_HAVE_${WEBP_SIMD_FLAG})
+    if(${I_SIMD} LESS 2 AND NOT HIGHEST_SSE_FLAG)
+      set(HIGHEST_SSE_FLAG ${SIMD_COMPILE_FLAG})
+    endif()
     # Memorize the file and flags.
     foreach(FILE ${SIMD_FILES})
       list(APPEND WEBP_SIMD_FILES_TO_INCLUDE ${FILE})
-      list(APPEND WEBP_SIMD_FLAGS_TO_INCLUDE ${SIMD_COMPILE_FLAG})
+      if(${I_SIMD} LESS 2)
+        list(APPEND WEBP_SIMD_FLAGS_TO_INCLUDE ${HIGHEST_SSE_FLAG})
+      else()
+        list(APPEND WEBP_SIMD_FLAGS_TO_INCLUDE ${SIMD_COMPILE_FLAG})
+      endif()
     endforeach()
   else()
     # Remove the file from the list.

From f2dfd92557a657f5c421c7d5dc62f5833e43f9e0 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Wed, 17 Oct 2018 16:55:30 +0200
Subject: [PATCH 02/12] Split HistogramAdd to only have the high level logic in
 C.

Change-Id: Ic9eaebf7128ca0215b49d2a13bde1f5b94a28061
(cherry picked from commit dea3e89983f299b3325898fa5b9474be258553b2)
---
 src/dsp/lossless.h            | 14 ++++---
 src/dsp/lossless_enc.c        | 57 ++++++++++++-------------
 src/dsp/lossless_enc_mips32.c | 79 ++++++++++-------------------------
 src/dsp/lossless_enc_sse2.c   | 44 ++++++-------------
 4 files changed, 72 insertions(+), 122 deletions(-)

diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h
index b2bbdfc9..f709cc86 100644
--- a/src/dsp/lossless.h
+++ b/src/dsp/lossless.h
@@ -163,7 +163,7 @@ extern VP8LCostCombinedFunc VP8LExtraCostCombined;
 extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
 
 typedef struct {        // small struct to hold counters
-  int counts[2];        // index: 0=zero steak, 1=non-zero streak
+  int counts[2];        // index: 0=zero streak, 1=non-zero streak
   int streaks[2][2];    // [zero/non-zero][streak<3 / streak>=3]
 } VP8LStreaks;
 
@@ -194,10 +194,14 @@ extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
 void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
                               VP8LBitEntropy* const entropy);
 
-typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a,
-                                     const VP8LHistogram* const b,
-                                     VP8LHistogram* const out);
-extern VP8LHistogramAddFunc VP8LHistogramAdd;
+typedef void (*VP8LAddVectorFunc)(const uint32_t* a, const uint32_t* b,
+                                  uint32_t* out, int size);
+extern VP8LAddVectorFunc VP8LAddVector;
+typedef void (*VP8LAddVectorEqFunc)(const uint32_t* a, uint32_t* out, int size);
+extern VP8LAddVectorEqFunc VP8LAddVectorEq;
+void VP8LHistogramAdd(const VP8LHistogram* const a,
+                      const VP8LHistogram* const b,
+                      VP8LHistogram* const out);
 
 // -----------------------------------------------------------------------------
 // PrefixEncode()
diff --git a/src/dsp/lossless_enc.c b/src/dsp/lossless_enc.c
index d608326f..6ec9e46a 100644
--- a/src/dsp/lossless_enc.c
+++ b/src/dsp/lossless_enc.c
@@ -632,36 +632,34 @@ static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
 
 //------------------------------------------------------------------------------
 
-static void HistogramAdd_C(const VP8LHistogram* const a,
-                           const VP8LHistogram* const b,
-                           VP8LHistogram* const out) {
+static void AddVector_C(const uint32_t* a, const uint32_t* b, uint32_t* out,
+                        int size) {
   int i;
+  for (i = 0; i < size; ++i) out[i] = a[i] + b[i];
+}
+
+static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
+  int i;
+  for (i = 0; i < size; ++i) out[i] += a[i];
+}
+
+void VP8LHistogramAdd(const VP8LHistogram* const a,
+                      const VP8LHistogram* const b, VP8LHistogram* const out) {
   const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
   assert(a->palette_code_bits_ == b->palette_code_bits_);
   if (b != out) {
-    for (i = 0; i < literal_size; ++i) {
-      out->literal_[i] = a->literal_[i] + b->literal_[i];
-    }
-    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
-      out->distance_[i] = a->distance_[i] + b->distance_[i];
-    }
-    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
-      out->red_[i] = a->red_[i] + b->red_[i];
-      out->blue_[i] = a->blue_[i] + b->blue_[i];
-      out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
-    }
+    VP8LAddVector(a->literal_, b->literal_, out->literal_, literal_size);
+    VP8LAddVector(a->distance_, b->distance_, out->distance_,
+                  NUM_DISTANCE_CODES);
+    VP8LAddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
+    VP8LAddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
+    VP8LAddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
   } else {
-    for (i = 0; i < literal_size; ++i) {
-      out->literal_[i] += a->literal_[i];
-    }
-    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
-      out->distance_[i] += a->distance_[i];
-    }
-    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
-      out->red_[i] += a->red_[i];
-      out->blue_[i] += a->blue_[i];
-      out->alpha_[i] += a->alpha_[i];
-    }
+    VP8LAddVectorEq(a->literal_, out->literal_, literal_size);
+    VP8LAddVectorEq(a->distance_, out->distance_, NUM_DISTANCE_CODES);
+    VP8LAddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES);
+    VP8LAddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES);
+    VP8LAddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
   }
 }
 
@@ -848,7 +846,8 @@ VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
 VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
 VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
 
-VP8LHistogramAddFunc VP8LHistogramAdd;
+VP8LAddVectorFunc VP8LAddVector;
+VP8LAddVectorEqFunc VP8LAddVectorEq;
 
 VP8LVectorMismatchFunc VP8LVectorMismatch;
 VP8LBundleColorMapFunc VP8LBundleColorMap;
@@ -885,7 +884,8 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
   VP8LGetEntropyUnrefined = GetEntropyUnrefined_C;
   VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_C;
 
-  VP8LHistogramAdd = HistogramAdd_C;
+  VP8LAddVector = AddVector_C;
+  VP8LAddVectorEq = AddVectorEq_C;
 
   VP8LVectorMismatch = VectorMismatch_C;
   VP8LBundleColorMap = VP8LBundleColorMap_C;
@@ -971,7 +971,8 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
   assert(VP8LCombinedShannonEntropy != NULL);
   assert(VP8LGetEntropyUnrefined != NULL);
   assert(VP8LGetCombinedEntropyUnrefined != NULL);
-  assert(VP8LHistogramAdd != NULL);
+  assert(VP8LAddVector != NULL);
+  assert(VP8LAddVectorEq != NULL);
   assert(VP8LVectorMismatch != NULL);
   assert(VP8LBundleColorMap != NULL);
   assert(VP8LPredictorsSub[0] != NULL);
diff --git a/src/dsp/lossless_enc_mips32.c b/src/dsp/lossless_enc_mips32.c
index e7b58f4e..0412a093 100644
--- a/src/dsp/lossless_enc_mips32.c
+++ b/src/dsp/lossless_enc_mips32.c
@@ -344,65 +344,29 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
     ASM_END_COMMON_0                                    \
     ASM_END_COMMON_1
 
-#define ADD_VECTOR(A, B, OUT, SIZE, EXTRA_SIZE)  do {   \
-  const uint32_t* pa = (const uint32_t*)(A);            \
-  const uint32_t* pb = (const uint32_t*)(B);            \
-  uint32_t* pout = (uint32_t*)(OUT);                    \
-  const uint32_t* const LoopEnd = pa + (SIZE);          \
-  assert((SIZE) % 4 == 0);                              \
-  ASM_START                                             \
-  ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)              \
-  ASM_END_0                                             \
-  if ((EXTRA_SIZE) > 0) {                               \
-    const int last = (EXTRA_SIZE);                      \
-    int i;                                              \
-    for (i = 0; i < last; ++i) pout[i] = pa[i] + pb[i]; \
-  }                                                     \
-} while (0)
-
-#define ADD_VECTOR_EQ(A, OUT, SIZE, EXTRA_SIZE)  do {   \
-  const uint32_t* pa = (const uint32_t*)(A);            \
-  uint32_t* pout = (uint32_t*)(OUT);                    \
-  const uint32_t* const LoopEnd = pa + (SIZE);          \
-  assert((SIZE) % 4 == 0);                              \
-  ASM_START                                             \
-  ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)            \
-  ASM_END_1                                             \
-  if ((EXTRA_SIZE) > 0) {                               \
-    const int last = (EXTRA_SIZE);                      \
-    int i;                                              \
-    for (i = 0; i < last; ++i) pout[i] += pa[i];        \
-  }                                                     \
-} while (0)
-
-static void HistogramAdd_MIPS32(const VP8LHistogram* const a,
-                                const VP8LHistogram* const b,
-                                VP8LHistogram* const out) {
+static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
+                             uint32_t* pout, int size) {
   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  const int extra_cache_size = VP8LHistogramNumCodes(a->palette_code_bits_)
-                             - (NUM_LITERAL_CODES + NUM_LENGTH_CODES);
-  assert(a->palette_code_bits_ == b->palette_code_bits_);
-
-  if (b != out) {
-    ADD_VECTOR(a->literal_, b->literal_, out->literal_,
-               NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
-    ADD_VECTOR(a->distance_, b->distance_, out->distance_,
-               NUM_DISTANCE_CODES, 0);
-    ADD_VECTOR(a->red_, b->red_, out->red_, NUM_LITERAL_CODES, 0);
-    ADD_VECTOR(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES, 0);
-    ADD_VECTOR(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
-  } else {
-    ADD_VECTOR_EQ(a->literal_, out->literal_,
-                  NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
-    ADD_VECTOR_EQ(a->distance_, out->distance_, NUM_DISTANCE_CODES, 0);
-    ADD_VECTOR_EQ(a->red_, out->red_, NUM_LITERAL_CODES, 0);
-    ADD_VECTOR_EQ(a->blue_, out->blue_, NUM_LITERAL_CODES, 0);
-    ADD_VECTOR_EQ(a->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
-  }
+  const uint32_t end = ((size) / 4) * 4;
+  const uint32_t* const LoopEnd = pa + end;
+  int i;
+  ASM_START
+  ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
+  ASM_END_0
+  for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
+}
+
+static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
+  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+  const uint32_t end = ((size) / 4) * 4;
+  const uint32_t* const LoopEnd = pa + end;
+  int i;
+  ASM_START
+  ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
+  ASM_END_1
+  for (i = end; i < size; ++i) pout[i] += pa[i];
 }
 
-#undef ADD_VECTOR_EQ
-#undef ADD_VECTOR
 #undef ASM_END_1
 #undef ASM_END_0
 #undef ASM_END_COMMON_1
@@ -422,7 +386,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
   VP8LExtraCostCombined = ExtraCostCombined_MIPS32;
   VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;
   VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;
-  VP8LHistogramAdd = HistogramAdd_MIPS32;
+  VP8LAddVector = AddVector_MIPS32;
+  VP8LAddVectorEq = AddVectorEq_MIPS32;
 }
 
 #else  // !WEBP_USE_MIPS32
diff --git a/src/dsp/lossless_enc_sse2.c b/src/dsp/lossless_enc_sse2.c
index f84a9909..36478c49 100644
--- a/src/dsp/lossless_enc_sse2.c
+++ b/src/dsp/lossless_enc_sse2.c
@@ -170,12 +170,13 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
 
 //------------------------------------------------------------------------------
 
+// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
+// that's ok since the histogram values are less than 1<<28 (max picture size).
 #define LINE_SIZE 16    // 8 or 16
 static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
                            int size) {
   int i;
-  assert(size % LINE_SIZE == 0);
-  for (i = 0; i < size; i += LINE_SIZE) {
+  for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
     const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i +  0]);
     const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i +  4]);
 #if (LINE_SIZE == 16)
@@ -195,12 +196,14 @@ static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
     _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
 #endif
   }
+  for (; i < size; ++i) {
+    out[i] = a[i] + b[i];
+  }
 }
 
 static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
   int i;
-  assert(size % LINE_SIZE == 0);
-  for (i = 0; i < size; i += LINE_SIZE) {
+  for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
     const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i +  0]);
     const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i +  4]);
 #if (LINE_SIZE == 16)
@@ -220,36 +223,12 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
     _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
 #endif
   }
+  for (; i < size; ++i) {
+    out[i] += a[i];
+  }
 }
 #undef LINE_SIZE
 
-// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
-// that's ok since the histogram values are less than 1<<28 (max picture size).
-static void HistogramAdd_SSE2(const VP8LHistogram* const a,
-                              const VP8LHistogram* const b,
-                              VP8LHistogram* const out) {
-  int i;
-  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
-  assert(a->palette_code_bits_ == b->palette_code_bits_);
-  if (b != out) {
-    AddVector_SSE2(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES);
-    AddVector_SSE2(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
-    AddVector_SSE2(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
-    AddVector_SSE2(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
-  } else {
-    AddVectorEq_SSE2(a->literal_, out->literal_, NUM_LITERAL_CODES);
-    AddVectorEq_SSE2(a->red_, out->red_, NUM_LITERAL_CODES);
-    AddVectorEq_SSE2(a->blue_, out->blue_, NUM_LITERAL_CODES);
-    AddVectorEq_SSE2(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
-  }
-  for (i = NUM_LITERAL_CODES; i < literal_size; ++i) {
-    out->literal_[i] = a->literal_[i] + b->literal_[i];
-  }
-  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
-    out->distance_[i] = a->distance_[i] + b->distance_[i];
-  }
-}
-
 //------------------------------------------------------------------------------
 // Entropy
 
@@ -675,7 +654,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
   VP8LTransformColor = TransformColor_SSE2;
   VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE2;
   VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2;
-  VP8LHistogramAdd = HistogramAdd_SSE2;
+  VP8LAddVector = AddVector_SSE2;
+  VP8LAddVectorEq = AddVectorEq_SSE2;
   VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2;
   VP8LVectorMismatch = VectorMismatch_SSE2;
   VP8LBundleColorMap = BundleColorMap_SSE2;

From f0abab921737e71d04c314748d784774caafbc02 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Fri, 28 Sep 2018 15:32:25 +0200
Subject: [PATCH 03/12] Speedups for empty histograms.

When histograms are empty, it is easy to add them.
They should also not be considered when merging histograms
(it is a waste of CPU).
This does not change the compression performance,
just the speed.

Change-Id: I42c721ca0f9c5ea067e73b792aa3db6d5e71d01f
(cherry picked from commit decf6f6b873206797d3a39b85d9e03a5dbfc82e7)
---
 src/dsp/lossless_enc.c  |  53 +++++++++++++++-----
 src/enc/histogram_enc.c | 106 +++++++++++++++++++++++++++-------------
 src/enc/histogram_enc.h |   3 +-
 3 files changed, 116 insertions(+), 46 deletions(-)

diff --git a/src/dsp/lossless_enc.c b/src/dsp/lossless_enc.c
index 6ec9e46a..1408fbf5 100644
--- a/src/dsp/lossless_enc.c
+++ b/src/dsp/lossless_enc.c
@@ -643,25 +643,56 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
   for (i = 0; i < size; ++i) out[i] += a[i];
 }
 
+#define ADD(X, ARG, LEN) do {                                                  \
+  if (a->is_used_[X]) {                                                        \
+    if (b->is_used_[X]) {                                                      \
+      VP8LAddVector(a->ARG, b->ARG, out->ARG, (LEN));                          \
+    } else {                                                                   \
+      memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0]));           \
+    }                                                                          \
+  } else if (b->is_used_[X]) {                                                 \
+    memcpy(&out->ARG[0], &b->ARG[0], (LEN) * sizeof(out->ARG[0]));             \
+  } else {                                                                     \
+    memset(&out->ARG[0], 0, (LEN) * sizeof(out->ARG[0]));                      \
+  }                                                                            \
+} while (0)
+
+#define ADD_EQ(X, ARG, LEN) do {                                               \
+  if (a->is_used_[X]) {                                                        \
+    if (out->is_used_[X]) {                                                    \
+      VP8LAddVectorEq(a->ARG, out->ARG, (LEN));                                \
+    } else {                                                                   \
+      memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0]));           \
+    }                                                                          \
+  }                                                                            \
+} while (0)
+
 void VP8LHistogramAdd(const VP8LHistogram* const a,
                       const VP8LHistogram* const b, VP8LHistogram* const out) {
+  int i;
   const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
   assert(a->palette_code_bits_ == b->palette_code_bits_);
+
   if (b != out) {
-    VP8LAddVector(a->literal_, b->literal_, out->literal_, literal_size);
-    VP8LAddVector(a->distance_, b->distance_, out->distance_,
-                  NUM_DISTANCE_CODES);
-    VP8LAddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
-    VP8LAddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
-    VP8LAddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
+    ADD(0, literal_, literal_size);
+    ADD(1, red_, NUM_LITERAL_CODES);
+    ADD(2, blue_, NUM_LITERAL_CODES);
+    ADD(3, alpha_, NUM_LITERAL_CODES);
+    ADD(4, distance_, NUM_DISTANCE_CODES);
+    for (i = 0; i < 5; ++i) {
+      out->is_used_[i] = (a->is_used_[i] | b->is_used_[i]);
+    }
   } else {
-    VP8LAddVectorEq(a->literal_, out->literal_, literal_size);
-    VP8LAddVectorEq(a->distance_, out->distance_, NUM_DISTANCE_CODES);
-    VP8LAddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES);
-    VP8LAddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES);
-    VP8LAddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
+    ADD_EQ(0, literal_, literal_size);
+    ADD_EQ(1, red_, NUM_LITERAL_CODES);
+    ADD_EQ(2, blue_, NUM_LITERAL_CODES);
+    ADD_EQ(3, alpha_, NUM_LITERAL_CODES);
+    ADD_EQ(4, distance_, NUM_DISTANCE_CODES);
+    for (i = 0; i < 5; ++i) out->is_used_[i] |= a->is_used_[i];
   }
 }
+#undef ADD
+#undef ADD_EQ
 
 //------------------------------------------------------------------------------
 // Image transforms.
diff --git a/src/enc/histogram_enc.c b/src/enc/histogram_enc.c
index 9fdbc627..c988b4d4 100644
--- a/src/enc/histogram_enc.c
+++ b/src/enc/histogram_enc.c
@@ -51,10 +51,12 @@ static void HistogramCopy(const VP8LHistogram* const src,
                           VP8LHistogram* const dst) {
   uint32_t* const dst_literal = dst->literal_;
   const int dst_cache_bits = dst->palette_code_bits_;
+  const int literal_size = VP8LHistogramNumCodes(dst_cache_bits);
   const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
   assert(src->palette_code_bits_ == dst_cache_bits);
   memcpy(dst, src, histo_size);
   dst->literal_ = dst_literal;
+  memcpy(dst->literal_, src->literal_, literal_size * sizeof(*dst->literal_));
 }
 
 int VP8LGetHistogramSize(int cache_bits) {
@@ -237,7 +239,8 @@ static double FinalHuffmanCost(const VP8LStreaks* const stats) {
 // Get the symbol entropy for the distribution 'population'.
 // Set 'trivial_sym', if there's only one symbol present in the distribution.
 static double PopulationCost(const uint32_t* const population, int length,
-                             uint32_t* const trivial_sym) {
+                             uint32_t* const trivial_sym,
+                             uint8_t* const is_used) {
   VP8LBitEntropy bit_entropy;
   VP8LStreaks stats;
   VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
@@ -245,6 +248,8 @@ static double PopulationCost(const uint32_t* const population, int length,
     *trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code
                                                : VP8L_NON_TRIVIAL_SYM;
   }
+  // The histogram is used if there is at least one non-zero streak.
+  *is_used = (stats.streaks[1][0] != 0 || stats.streaks[1][1] != 0);
 
   return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
 }
@@ -253,7 +258,9 @@ static double PopulationCost(const uint32_t* const population, int length,
 // non-zero: both the zero-th one, or both the last one.
 static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
                                              const uint32_t* const Y,
-                                             int length, int trivial_at_end) {
+                                             int length, int is_X_used,
+                                             int is_Y_used,
+                                             int trivial_at_end) {
   VP8LStreaks stats;
   if (trivial_at_end) {
     // This configuration is due to palettization that transforms an indexed
@@ -262,28 +269,43 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
     // Only FinalHuffmanCost needs to be evaluated.
     memset(&stats, 0, sizeof(stats));
     // Deal with the non-zero value at index 0 or length-1.
-    stats.streaks[1][0] += 1;
+    stats.streaks[1][0] = 1;
     // Deal with the following/previous zero streak.
-    stats.counts[0] += 1;
-    stats.streaks[0][1] += length - 1;
+    stats.counts[0] = 1;
+    stats.streaks[0][1] = length - 1;
     return FinalHuffmanCost(&stats);
   } else {
     VP8LBitEntropy bit_entropy;
-    VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
+    if (is_X_used) {
+      if (is_Y_used) {
+        VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
+      } else {
+        VP8LGetEntropyUnrefined(X, length, &bit_entropy, &stats);
+      }
+    } else {
+      if (is_Y_used) {
+        VP8LGetEntropyUnrefined(Y, length, &bit_entropy, &stats);
+      } else {
+        memset(&stats, 0, sizeof(stats));
+        stats.counts[0] = 1;
+        stats.streaks[0][length > 3] = length;
+        VP8LBitEntropyInit(&bit_entropy);
+      }
+    }
 
     return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
   }
 }
 
 // Estimates the Entropy + Huffman + other block overhead size cost.
-double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
+double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
   return
-      PopulationCost(
-          p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)
-      + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
-      + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
-      + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
-      + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
+      PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
+                     NULL, &p->is_used_[0])
+      + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1])
+      + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2])
+      + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3])
+      + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, &p->is_used_[4])
       + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
       + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
 }
@@ -299,7 +321,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
   int trivial_at_end = 0;
   assert(a->palette_code_bits_ == b->palette_code_bits_);
   *cost += GetCombinedEntropy(a->literal_, b->literal_,
-                              VP8LHistogramNumCodes(palette_code_bits), 0);
+                              VP8LHistogramNumCodes(palette_code_bits),
+                              a->is_used_[0], b->is_used_[0], 0);
   *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
                                  b->literal_ + NUM_LITERAL_CODES,
                                  NUM_LENGTH_CODES);
@@ -319,19 +342,23 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
   }
 
   *cost +=
-      GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, trivial_at_end);
+      GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, a->is_used_[1],
+                         b->is_used_[1], trivial_at_end);
   if (*cost > cost_threshold) return 0;
 
   *cost +=
-      GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, trivial_at_end);
-  if (*cost > cost_threshold) return 0;
-
-  *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
-                              trivial_at_end);
+      GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, a->is_used_[2],
+                         b->is_used_[2], trivial_at_end);
   if (*cost > cost_threshold) return 0;
 
   *cost +=
-      GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, 0);
+      GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
+                         a->is_used_[3], b->is_used_[3], trivial_at_end);
+  if (*cost > cost_threshold) return 0;
+
+  *cost +=
+      GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
+                         a->is_used_[4], b->is_used_[4], 0);
   *cost +=
       VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
   if (*cost > cost_threshold) return 0;
@@ -419,16 +446,19 @@ static void UpdateDominantCostRange(
 static void UpdateHistogramCost(VP8LHistogram* const h) {
   uint32_t alpha_sym, red_sym, blue_sym;
   const double alpha_cost =
-      PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym);
+      PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym,
+                     &h->is_used_[3]);
   const double distance_cost =
-      PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +
+      PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
       VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
   const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
-  h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) +
-                     VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
-                                   NUM_LENGTH_CODES);
-  h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
-  h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
+  h->literal_cost_ =
+      PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) +
+          VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
+  h->red_cost_ =
+      PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]);
+  h->blue_cost_ =
+      PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym, &h->is_used_[2]);
   h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
                  alpha_cost + distance_cost;
   if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
@@ -493,11 +523,19 @@ static void HistogramCopyAndAnalyze(
   const int histo_size = orig_histo->size;
   VP8LHistogram** const orig_histograms = orig_histo->histograms;
   VP8LHistogram** const histograms = image_histo->histograms;
+  image_histo->size = 0;
   for (i = 0; i < histo_size; ++i) {
     VP8LHistogram* const histo = orig_histograms[i];
     UpdateHistogramCost(histo);
+
+    // Skip the histogram if it is completely empty, which can happen for tiles
+    // with no information (when they are skipped because of LZ77).
+    if (!histo->is_used_[0] && !histo->is_used_[1] && !histo->is_used_[2]
+        && !histo->is_used_[3] && !histo->is_used_[4]) {
+      continue;
+    }
     // Copy histograms from orig_histo[] to image_histo[].
-    HistogramCopy(histo, histograms[i]);
+    HistogramCopy(histo, histograms[image_histo->size++]);
   }
 }
 
@@ -987,8 +1025,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
   // histograms of small sizes (as bin_map will be very sparse) and
   // maximum quality q==100 (to preserve the compression gains at that level).
   const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
-  const int entropy_combine =
-      (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
+  int entropy_combine;
 
   if (orig_histo == NULL) goto Error;
 
@@ -996,15 +1033,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
   HistogramBuild(xsize, histo_bits, refs, orig_histo);
   // Copies the histograms and computes its bit_cost.
   HistogramCopyAndAnalyze(orig_histo, image_histo);
-
+  entropy_combine =
+      (image_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
   if (entropy_combine) {
-    const int bin_map_size = orig_histo->size;
+    const int bin_map_size = image_histo->size;
     // Reuse histogram_symbols storage. By definition, it's guaranteed to be ok.
     uint16_t* const bin_map = histogram_symbols;
     const double combine_cost_factor =
         GetCombineCostFactor(image_histo_raw_size, quality);
 
-    HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
+    HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort);
     // Collapse histograms with similar entropy.
     HistogramCombineEntropyBin(image_histo, tmp_histo, bin_map, bin_map_size,
                                entropy_combine_num_bins, combine_cost_factor,
diff --git a/src/enc/histogram_enc.h b/src/enc/histogram_enc.h
index e8c4c83f..67d82533 100644
--- a/src/enc/histogram_enc.h
+++ b/src/enc/histogram_enc.h
@@ -44,6 +44,7 @@ typedef struct {
   double literal_cost_;      // Cached values of dominant entropy costs:
   double red_cost_;          // literal, red & blue.
   double blue_cost_;
+  uint8_t is_used_[5];       // 5 for literal, red, blue, alpha, distance
 } VP8LHistogram;
 
 // Collection of histograms with fixed capacity, allocated as one
@@ -113,7 +114,7 @@ double VP8LBitsEntropy(const uint32_t* const array, int n);
 
 // Estimate how many bits the combined entropy of literals and distance
 // approximately maps to.
-double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
+double VP8LHistogramEstimateBits(VP8LHistogram* const p);
 
 #ifdef __cplusplus
 }

From 301a2ddae59ddb9ef247852301478962e0d82f04 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Thu, 25 Oct 2018 23:05:08 -0700
Subject: [PATCH 04/12] img2webp: add help note about arguments from a file

this was added in:
94a8377b extract the command-line parsing helpers to example_util

and matches the help in webpmux

https://groups.google.com/a/webmproject.org/d/msg/webp-discuss/DJs-w_-Id6o/svFXs2CqBgAJ

BUG=webp:101

Change-Id: I2944d1fb1ed3030c356960be2a6c8de15a79311f
(cherry picked from commit b6284d8247b0bc2c27efa37cdbf3c6a46a0f1791)
---
 examples/img2webp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/examples/img2webp.c b/examples/img2webp.c
index 2f750c59..c440b36a 100644
--- a/examples/img2webp.c
+++ b/examples/img2webp.c
@@ -61,6 +61,10 @@ static void Help(void) {
   printf("\n");
   printf("example: img2webp -loop 2 in0.png -lossy in1.jpg\n"
          "                  -d 80 in2.tiff -o out.webp\n");
+  printf("\nNote: if a single file name is passed as the argument, the "
+         "arguments will be\n");
+  printf("tokenized from this file. The file name must not start with "
+         "the character '-'.\n");
 }
 
 //------------------------------------------------------------------------------

From 0c57031629c73ff168996703fbe4ac7244e8cd00 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Sat, 27 Oct 2018 12:40:29 -0700
Subject: [PATCH 05/12] update AUTHORS

Change-Id: Ie7731464088d985a7398401c8ef45bd26c536fe5
---
 AUTHORS | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/AUTHORS b/AUTHORS
index 83c7b9c5..06df9987 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,4 +1,5 @@
 Contributors:
+- Alan Browning (browning at google dot com)
 - Charles Munger (clm at google dot com)
 - Christian Duvivier (cduvivier at google dot com)
 - Djordje Pesut (djordje dot pesut at imgtec dot com)
@@ -9,6 +10,7 @@ Contributors:
 - Johann (johann dot koenig at duck dot com)
 - Jovan Zelincevic (jovan dot zelincevic at imgtec dot com)
 - Jyrki Alakuijala (jyrki at google dot com)
+- Konstantin Ivlev (tomskside at gmail dot com)
 - Lode Vandevenne (lode at google dot com)
 - Lou Quillio (louquillio at google dot com)
 - Mans Rullgard (mans at mansr dot com)
@@ -37,3 +39,4 @@ Contributors:
 - Vincent Rabaud (vrabaud at google dot com)
 - Vlad Tsyrklevich (vtsyrklevich at chromium dot org)
 - Yang Zhang (yang dot zhang at arm dot com)
+- Yannis Guyon (yguyon at google dot com)

From d61385db3540b391fdb8f887d25b33232263387e Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Tue, 30 Oct 2018 20:33:20 +0100
Subject: [PATCH 06/12] Speed-up: Make sure we only initialize histograms when
 needed.

Also, histograms in a HistogramSet can be initialized all
at once.

Change-Id: Ibbfa6034dce58dca8bb9113487e2ae507222ce7d
(cherry picked from commit 6752904b2f57aa19e325f773e81ac70b4f302f26)
---
 src/enc/backward_references_cost_enc.c |  2 +-
 src/enc/backward_references_enc.c      |  1 +
 src/enc/histogram_enc.c                | 72 +++++++++++++++++++++-----
 src/enc/histogram_enc.h                |  7 ++-
 src/enc/vp8l_enc.c                     |  1 +
 5 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/src/enc/backward_references_cost_enc.c b/src/enc/backward_references_cost_enc.c
index 7175496c..516abd73 100644
--- a/src/enc/backward_references_cost_enc.c
+++ b/src/enc/backward_references_cost_enc.c
@@ -67,7 +67,7 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
 
   // The following code is similar to VP8LHistogramCreate but converts the
   // distance to plane code.
-  VP8LHistogramInit(histo, cache_bits);
+  VP8LHistogramInit(histo, cache_bits, /*init_arrays=*/ 1);
   while (VP8LRefsCursorOk(&c)) {
     VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos, VP8LDistanceToPlaneCode,
                                     xsize);
diff --git a/src/enc/backward_references_enc.c b/src/enc/backward_references_enc.c
index 39230188..3ab7b0ac 100644
--- a/src/enc/backward_references_enc.c
+++ b/src/enc/backward_references_enc.c
@@ -715,6 +715,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
   for (i = 0; i <= cache_bits_max; ++i) {
     histos[i] = VP8LAllocateHistogram(i);
     if (histos[i] == NULL) goto Error;
+    VP8LHistogramInit(histos[i], i, /*init_arrays=*/ 1);
     if (i == 0) continue;
     cc_init[i] = VP8LColorCacheInit(&hashers[i], i);
     if (!cc_init[i]) goto Error;
diff --git a/src/enc/histogram_enc.c b/src/enc/histogram_enc.c
index c988b4d4..28a3e6e6 100644
--- a/src/enc/histogram_enc.c
+++ b/src/enc/histogram_enc.c
@@ -93,9 +93,19 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
   VP8LHistogramStoreRefs(refs, p);
 }
 
-void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
+                       int init_arrays) {
   p->palette_code_bits_ = palette_code_bits;
-  HistogramClear(p);
+  if (init_arrays) {
+    HistogramClear(p);
+  } else {
+    p->trivial_symbol_ = 0;
+    p->bit_cost_ = 0.;
+    p->literal_cost_ = 0.;
+    p->red_cost_ = 0.;
+    p->blue_cost_ = 0.;
+    memset(p->is_used_, 0, sizeof(p->is_used_));
+  }
 }
 
 VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
@@ -106,37 +116,70 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
   histo = (VP8LHistogram*)memory;
   // literal_ won't necessary be aligned.
   histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
-  VP8LHistogramInit(histo, cache_bits);
+  VP8LHistogramInit(histo, cache_bits, /*init_arrays=*/ 0);
   return histo;
 }
 
+// Resets the pointers of the histograms to point to the bit buffer in the set.
+static void HistogramSetResetPointers(VP8LHistogramSet* const set,
+                                      int cache_bits) {
+  int i;
+  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  uint8_t* memory = (uint8_t*) (set->histograms);
+  memory += set->max_size * sizeof(*set->histograms);
+  for (i = 0; i < set->max_size; ++i) {
+    memory = (uint8_t*) WEBP_ALIGN(memory);
+    set->histograms[i] = (VP8LHistogram*) memory;
+    // literal_ won't necessary be aligned.
+    set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
+    memory += histo_size;
+  }
+}
+
+// Returns the total size of the VP8LHistogramSet.
+static size_t HistogramSetTotalSize(int size, int cache_bits) {
+  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  return (sizeof(VP8LHistogramSet) + size * (sizeof(VP8LHistogram*) +
+          histo_size + WEBP_ALIGN_CST));
+}
+
 VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
   int i;
   VP8LHistogramSet* set;
-  const int histo_size = VP8LGetHistogramSize(cache_bits);
-  const size_t total_size =
-      sizeof(*set) + size * (sizeof(*set->histograms) +
-      histo_size + WEBP_ALIGN_CST);
+  const size_t total_size = HistogramSetTotalSize(size, cache_bits);
   uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
   if (memory == NULL) return NULL;
 
   set = (VP8LHistogramSet*)memory;
   memory += sizeof(*set);
   set->histograms = (VP8LHistogram**)memory;
-  memory += size * sizeof(*set->histograms);
   set->max_size = size;
   set->size = size;
+  HistogramSetResetPointers(set, cache_bits);
   for (i = 0; i < size; ++i) {
-    memory = (uint8_t*)WEBP_ALIGN(memory);
-    set->histograms[i] = (VP8LHistogram*)memory;
-    // literal_ won't necessary be aligned.
-    set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
-    VP8LHistogramInit(set->histograms[i], cache_bits);
-    memory += histo_size;
+    VP8LHistogramInit(set->histograms[i], cache_bits, /*init_arrays=*/ 0);
   }
   return set;
 }
 
+void VP8LHistogramSetClear(VP8LHistogramSet* const set) {
+  int i;
+  const int cache_bits = set->histograms[0]->palette_code_bits_;
+  const int size = set->size;
+  const size_t total_size = HistogramSetTotalSize(size, cache_bits);
+  uint8_t* memory = (uint8_t*)set;
+
+  memset(memory, 0, total_size);
+  memory += sizeof(*set);
+  set->histograms = (VP8LHistogram**)memory;
+  set->max_size = size;
+  set->size = size;
+  HistogramSetResetPointers(set, cache_bits);
+  for (i = 0; i < size; ++i) {
+    set->histograms[i]->palette_code_bits_ = cache_bits;
+  }
+}
+
 // -----------------------------------------------------------------------------
 
 void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
@@ -503,6 +546,7 @@ static void HistogramBuild(
   VP8LHistogram** const histograms = image_histo->histograms;
   VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);
   assert(histo_bits > 0);
+  VP8LHistogramSetClear(image_histo);
   while (VP8LRefsCursorOk(&c)) {
     const PixOrCopy* const v = c.cur_pos;
     const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
diff --git a/src/enc/histogram_enc.h b/src/enc/histogram_enc.h
index 67d82533..54c2d217 100644
--- a/src/enc/histogram_enc.h
+++ b/src/enc/histogram_enc.h
@@ -68,7 +68,9 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
 int VP8LGetHistogramSize(int palette_code_bits);
 
 // Set the palette_code_bits and reset the stats.
-void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits);
+// If init_arrays is true, the arrays are also filled with 0's.
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
+                       int init_arrays);
 
 // Collect all the references into a histogram (without reset)
 void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
@@ -84,6 +86,9 @@ void VP8LFreeHistogramSet(VP8LHistogramSet* const histo);
 // using 'cache_bits'. Return NULL in case of memory error.
 VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits);
 
+// Set the histograms in set to 0.
+void VP8LHistogramSetClear(VP8LHistogramSet* const set);
+
 // Allocate and initialize histogram object with specified 'cache_bits'.
 // Returns NULL in case of memory error.
 // Special case of VP8LAllocateHistogramSet, with size equals 1.
diff --git a/src/enc/vp8l_enc.c b/src/enc/vp8l_enc.c
index b7411bf4..2713edcd 100644
--- a/src/enc/vp8l_enc.c
+++ b/src/enc/vp8l_enc.c
@@ -809,6 +809,7 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
+  VP8LHistogramSetClear(histogram_image);
 
   // Build histogram image and symbols from backward references.
   VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]);

From f5a5918d13279bdcc3de52b0242db6bec79b460b Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 2 Nov 2018 20:36:14 -0700
Subject: [PATCH 07/12] bump version to 1.0.1

libwebp{,decoder} - 1.0.1
libwebp libtool - 7.3.0
libwebpdecoder libtool - 3.3.0

mux - 1.0.1
libtool - 3.3.0

demux - 1.0.1
libtool - 2.5.0

Change-Id: I4310caed27d1e53cc8c1b534571e3d653ad434c8
---
 README                    | 5 ++++-
 README.mux                | 2 +-
 configure.ac              | 2 +-
 extras/extras.c           | 2 +-
 src/Makefile.am           | 4 ++--
 src/dec/vp8i_dec.h        | 2 +-
 src/demux/Makefile.am     | 2 +-
 src/demux/demux.c         | 2 +-
 src/demux/libwebpdemux.rc | 8 ++++----
 src/enc/vp8i_enc.h        | 2 +-
 src/libwebp.rc            | 8 ++++----
 src/libwebpdecoder.rc     | 8 ++++----
 src/mux/Makefile.am       | 2 +-
 src/mux/libwebpmux.rc     | 8 ++++----
 src/mux/muxi.h            | 2 +-
 15 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/README b/README
index 103b3ae8..4fa15b37 100644
--- a/README
+++ b/README
@@ -4,7 +4,7 @@
           \__\__/\____/\_____/__/ ____  ___
                 / _/ /    \    \ /  _ \/ _/
                /  \_/   / /   \ \   __/  \__
-               \____/____/\_____/_____/____/v1.0.0
+               \____/____/\_____/_____/____/v1.0.1
 
 Description:
 ============
@@ -472,6 +472,9 @@ Per-frame options (only used for subsequent images input):
 example: img2webp -loop 2 in0.png -lossy in1.jpg
                   -d 80 in2.tiff -o out.webp
 
+Note: if a single file name is passed as the argument, the arguments will be
+tokenized from this file. The file name must not start with the character '-'.
+
 Animated GIF conversion:
 ========================
 Animated GIF files can be converted to WebP files with animation using the
diff --git a/README.mux b/README.mux
index bd4f92fa..ef705af3 100644
--- a/README.mux
+++ b/README.mux
@@ -1,7 +1,7 @@
 ﻿          __   __  ____  ____  ____  __ __  _     __ __
          /  \\/  \/  _ \/  _ \/  _ \/  \  \/ \___/_ / _\
          \       /   __/  _  \   __/      /  /  (_/  /__
-          \__\__/\_____/_____/__/  \__//_/\_____/__/___/v1.0.0
+          \__\__/\_____/_____/__/  \__//_/\_____/__/___/v1.0.1
 
 
 Description:
diff --git a/configure.ac b/configure.ac
index c85047f6..055f9316 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([libwebp], [1.0.0],
+AC_INIT([libwebp], [1.0.1],
         [https://bugs.chromium.org/p/webp],,
         [http://developers.google.com/speed/webp])
 AC_CANONICAL_HOST
diff --git a/extras/extras.c b/extras/extras.c
index 2feb595c..3dc29fe7 100644
--- a/extras/extras.c
+++ b/extras/extras.c
@@ -18,7 +18,7 @@
 
 #define XTRA_MAJ_VERSION 1
 #define XTRA_MIN_VERSION 0
-#define XTRA_REV_VERSION 0
+#define XTRA_REV_VERSION 1
 
 //------------------------------------------------------------------------------
 
diff --git a/src/Makefile.am b/src/Makefile.am
index dfa74d35..49509583 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -36,7 +36,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
 # other than the ones listed on the command line, i.e., after linking, it will
 # not have unresolved symbols. Some platforms (Windows among them) require all
 # symbols in shared libraries to be resolved at library creation.
-libwebp_la_LDFLAGS = -no-undefined -version-info 7:2:0
+libwebp_la_LDFLAGS = -no-undefined -version-info 7:3:0
 libwebpincludedir = $(includedir)/webp
 pkgconfig_DATA = libwebp.pc
 
@@ -48,7 +48,7 @@ if BUILD_LIBWEBPDECODER
   libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
   libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la
 
-  libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:2:0
+  libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:3:0
   pkgconfig_DATA += libwebpdecoder.pc
 endif
 
diff --git a/src/dec/vp8i_dec.h b/src/dec/vp8i_dec.h
index f5fa4bee..e5e89df5 100644
--- a/src/dec/vp8i_dec.h
+++ b/src/dec/vp8i_dec.h
@@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define DEC_MAJ_VERSION 1
 #define DEC_MIN_VERSION 0
-#define DEC_REV_VERSION 0
+#define DEC_REV_VERSION 1
 
 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
 // Constraints are: We need to store one 16x16 block of luma samples (y),
diff --git a/src/demux/Makefile.am b/src/demux/Makefile.am
index 7e80bdc2..39f9f1a2 100644
--- a/src/demux/Makefile.am
+++ b/src/demux/Makefile.am
@@ -13,6 +13,6 @@ noinst_HEADERS =
 noinst_HEADERS += ../webp/format_constants.h
 
 libwebpdemux_la_LIBADD = ../libwebp.la
-libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:4:0
+libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:5:0
 libwebpdemuxincludedir = $(includedir)/webp
 pkgconfig_DATA = libwebpdemux.pc
diff --git a/src/demux/demux.c b/src/demux/demux.c
index 684215e3..a69c65b7 100644
--- a/src/demux/demux.c
+++ b/src/demux/demux.c
@@ -25,7 +25,7 @@
 
 #define DMUX_MAJ_VERSION 1
 #define DMUX_MIN_VERSION 0
-#define DMUX_REV_VERSION 0
+#define DMUX_REV_VERSION 1
 
 typedef struct {
   size_t start_;        // start location of the data
diff --git a/src/demux/libwebpdemux.rc b/src/demux/libwebpdemux.rc
index 544a8b2f..0ec17e3e 100644
--- a/src/demux/libwebpdemux.rc
+++ b/src/demux/libwebpdemux.rc
@@ -6,8 +6,8 @@
 LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 1,0,0,0
- PRODUCTVERSION 1,0,0,0
+ FILEVERSION 1,0,0,1
+ PRODUCTVERSION 1,0,0,1
  FILEFLAGSMASK 0x3fL
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -24,12 +24,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Google, Inc."
             VALUE "FileDescription", "libwebpdemux DLL"
-            VALUE "FileVersion", "1.0.0"
+            VALUE "FileVersion", "1.0.1"
             VALUE "InternalName", "libwebpdemux.dll"
             VALUE "LegalCopyright", "Copyright (C) 2018"
             VALUE "OriginalFilename", "libwebpdemux.dll"
             VALUE "ProductName", "WebP Image Demuxer"
-            VALUE "ProductVersion", "1.0.0"
+            VALUE "ProductVersion", "1.0.1"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/src/enc/vp8i_enc.h b/src/enc/vp8i_enc.h
index 8ab9a26a..92439feb 100644
--- a/src/enc/vp8i_enc.h
+++ b/src/enc/vp8i_enc.h
@@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define ENC_MAJ_VERSION 1
 #define ENC_MIN_VERSION 0
-#define ENC_REV_VERSION 0
+#define ENC_REV_VERSION 1
 
 enum { MAX_LF_LEVELS = 64,       // Maximum loop filter level
        MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
diff --git a/src/libwebp.rc b/src/libwebp.rc
index d554124f..282f382d 100644
--- a/src/libwebp.rc
+++ b/src/libwebp.rc
@@ -6,8 +6,8 @@
 LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 1,0,0,0
- PRODUCTVERSION 1,0,0,0
+ FILEVERSION 1,0,0,1
+ PRODUCTVERSION 1,0,0,1
  FILEFLAGSMASK 0x3fL
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -24,12 +24,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Google, Inc."
             VALUE "FileDescription", "libwebp DLL"
-            VALUE "FileVersion", "1.0.0"
+            VALUE "FileVersion", "1.0.1"
             VALUE "InternalName", "libwebp.dll"
             VALUE "LegalCopyright", "Copyright (C) 2018"
             VALUE "OriginalFilename", "libwebp.dll"
             VALUE "ProductName", "WebP Image Codec"
-            VALUE "ProductVersion", "1.0.0"
+            VALUE "ProductVersion", "1.0.1"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/src/libwebpdecoder.rc b/src/libwebpdecoder.rc
index 8a2def48..87c0ff5c 100644
--- a/src/libwebpdecoder.rc
+++ b/src/libwebpdecoder.rc
@@ -6,8 +6,8 @@
 LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 1,0,0,0
- PRODUCTVERSION 1,0,0,0
+ FILEVERSION 1,0,0,1
+ PRODUCTVERSION 1,0,0,1
  FILEFLAGSMASK 0x3fL
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -24,12 +24,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Google, Inc."
             VALUE "FileDescription", "libwebpdecoder DLL"
-            VALUE "FileVersion", "1.0.0"
+            VALUE "FileVersion", "1.0.1"
             VALUE "InternalName", "libwebpdecoder.dll"
             VALUE "LegalCopyright", "Copyright (C) 2018"
             VALUE "OriginalFilename", "libwebpdecoder.dll"
             VALUE "ProductName", "WebP Image Decoder"
-            VALUE "ProductVersion", "1.0.0"
+            VALUE "ProductVersion", "1.0.1"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/src/mux/Makefile.am b/src/mux/Makefile.am
index 447bcce4..e65a92b2 100644
--- a/src/mux/Makefile.am
+++ b/src/mux/Makefile.am
@@ -17,6 +17,6 @@ noinst_HEADERS =
 noinst_HEADERS += ../webp/format_constants.h
 
 libwebpmux_la_LIBADD = ../libwebp.la
-libwebpmux_la_LDFLAGS = -no-undefined -version-info 3:2:0 -lm
+libwebpmux_la_LDFLAGS = -no-undefined -version-info 3:3:0 -lm
 libwebpmuxincludedir = $(includedir)/webp
 pkgconfig_DATA = libwebpmux.pc
diff --git a/src/mux/libwebpmux.rc b/src/mux/libwebpmux.rc
index 8c7d5f67..a3746c7a 100644
--- a/src/mux/libwebpmux.rc
+++ b/src/mux/libwebpmux.rc
@@ -6,8 +6,8 @@
 LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 1,0,0,0
- PRODUCTVERSION 1,0,0,0
+ FILEVERSION 1,0,0,1
+ PRODUCTVERSION 1,0,0,1
  FILEFLAGSMASK 0x3fL
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -24,12 +24,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Google, Inc."
             VALUE "FileDescription", "libwebpmux DLL"
-            VALUE "FileVersion", "1.0.0"
+            VALUE "FileVersion", "1.0.1"
             VALUE "InternalName", "libwebpmux.dll"
             VALUE "LegalCopyright", "Copyright (C) 2018"
             VALUE "OriginalFilename", "libwebpmux.dll"
             VALUE "ProductName", "WebP Image Muxer"
-            VALUE "ProductVersion", "1.0.0"
+            VALUE "ProductVersion", "1.0.1"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/src/mux/muxi.h b/src/mux/muxi.h
index 25877f61..df9f74c6 100644
--- a/src/mux/muxi.h
+++ b/src/mux/muxi.h
@@ -29,7 +29,7 @@ extern "C" {
 
 #define MUX_MAJ_VERSION 1
 #define MUX_MIN_VERSION 0
-#define MUX_REV_VERSION 0
+#define MUX_REV_VERSION 1
 
 // Chunk object.
 typedef struct WebPChunk WebPChunk;

From 4cbb4caf49675e47176fa3543c9e1d400ea77e42 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 2 Nov 2018 23:02:11 -0700
Subject: [PATCH 08/12] update NEWS

Change-Id: I4a97342d47247724f12da9d8a7d8f22047c2a179
---
 NEWS | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/NEWS b/NEWS
index 480cb7d3..af07d11b 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,15 @@
+- 11/2/2018: version 1.0.1
+  This is a binary compatible release.
+  * lossless encoder speedups
+  * big-endian fix for alpha decoding (issue #393)
+  * gif2webp fix for loop count=65535 transcode (issue #382)
+  * further security related hardening in libwebp & libwebpmux
+    (issues #383, #385, #386, #387, #388, #391)
+    (oss-fuzz #9099, #9100, #9105, #9106, #9111, #9112, #9119, #9123, #9170,
+              #9178, #9179, #9183, #9186, #9191, #9364, #9417, #9496, #10349,
+              #10423, #10634, #10700, #10838, #10922, #11021, #11088, #11152)
+  * miscellaneous bug & build fixes (issues #381, #394, #396, #397, #400)
+
 - 4/2/2018: version 1.0.0
   This is a binary compatible release.
   * lossy encoder improvements to avoid chroma shifts in various circumstances

From 3be698c3d3b772e4730276294de1e37b801b6e2f Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Tue, 6 Nov 2018 15:24:24 +0100
Subject: [PATCH 09/12] CMake: fix webp_js compilation

Stick to the strict necessary for running webp_js,
and avoid building sub-lib or examples with heavy dependencies.

Change-Id: Ife4170a7839fb3201b2cf158d98d17bebe10008f
(cherry picked from commit 4cd0582d50eaa79140723c180fc6bfd3891b07f5)
---
 CMakeLists.txt | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ccd1b48b..61973c6c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,6 +20,15 @@ option(WEBP_ENABLE_SWAP_16BIT_CSP "Enable byte swap for 16 bit colorspaces."
 
 if(WEBP_BUILD_WEBP_JS)
   set(WEBP_ENABLE_SIMD OFF)
+  set(WEBP_BUILD_ANIM_UTILS OFF)
+  set(WEBP_BUILD_CWEBP OFF)
+  set(WEBP_BUILD_DWEBP OFF)
+  set(WEBP_BUILD_GIF2WEBP OFF)
+  set(WEBP_BUILD_IMG2WEBP OFF)
+  set(WEBP_BUILD_VWEBP OFF)
+  set(WEBP_BUILD_WEBPINFO OFF)
+  set(WEBP_BUILD_WEBPMUX OFF)
+  set(WEBP_BUILD_EXTRAS OFF)
 endif()
 
 set(WEBP_DEP_LIBRARIES)
@@ -317,8 +326,7 @@ if(WEBP_BUILD_ANIM_UTILS
    OR WEBP_BUILD_DWEBP
    OR WEBP_BUILD_GIF2WEBP
    OR WEBP_BUILD_IMG2WEBP
-   OR WEBP_BUILD_VWEBP
-   OR WEBP_BUILD_WEBP_JS)
+   OR WEBP_BUILD_VWEBP)
   # Example utility library.
   parse_makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/examples "EXAMPLEUTIL_SRCS"
                     "example_util_[^ ]*")

From 825389acba39320f7e954e010f1d69ec34b0695e Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Tue, 6 Nov 2018 18:47:38 -0800
Subject: [PATCH 10/12] README.mux: add a reference to the AnimDecoder API

this balances the AnimEncoder section

Change-Id: I205c8d0bd6104509e06737dcbf9a7651fd4bc6a3
---
 README.mux | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/README.mux b/README.mux
index ef705af3..fdcf4909 100644
--- a/README.mux
+++ b/README.mux
@@ -211,6 +211,35 @@ Code example:
 For a detailed AnimEncoder API reference, please refer to the header file
 (src/webp/mux.h).
 
+AnimDecoder API:
+================
+This AnimDecoder API allows decoding (possibly) animated WebP images.
+
+Code Example:
+
+  WebPAnimDecoderOptions dec_options;
+  WebPAnimDecoderOptionsInit(&dec_options);
+  // Tune 'dec_options' as needed.
+  WebPAnimDecoder* dec = WebPAnimDecoderNew(webp_data, &dec_options);
+  WebPAnimInfo anim_info;
+  WebPAnimDecoderGetInfo(dec, &anim_info);
+  for (uint32_t i = 0; i < anim_info.loop_count; ++i) {
+    while (WebPAnimDecoderHasMoreFrames(dec)) {
+      uint8_t* buf;
+      int timestamp;
+      WebPAnimDecoderGetNext(dec, &buf, &timestamp);
+      // ... (Render 'buf' based on 'timestamp').
+      // ... (Do NOT free 'buf', as it is owned by 'dec').
+    }
+    WebPAnimDecoderReset(dec);
+  }
+  const WebPDemuxer* demuxer = WebPAnimDecoderGetDemuxer(dec);
+  // ... (Do something using 'demuxer'; e.g. get EXIF/XMP/ICC data).
+  WebPAnimDecoderDelete(dec);
+
+For a detailed AnimDecoder API reference, please refer to the header file
+(src/webp/demux.h).
+
 
 Bugs:
 =====

From fa8210e43ca559607bead50d1b931e8c45453ad4 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Wed, 7 Nov 2018 00:33:14 +0100
Subject: [PATCH 11/12] Fix pair update in stochastic entropy merging.

The old code simply did not make sense.
The effect is that the pair would be popped from the
queue no matter what; as the queue is small, it does
not matter that much on the results.
But it will matter for a later CL.

Change-Id: If50c9fa9d7f3ac3c48bb7336d81479287d4944c4
(cherry picked from commit 485ff86fbb174b518834503cb39c9c8e20567f38)
---
 src/enc/histogram_enc.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/enc/histogram_enc.c b/src/enc/histogram_enc.c
index 28a3e6e6..4e49e0a2 100644
--- a/src/enc/histogram_enc.c
+++ b/src/enc/histogram_enc.c
@@ -756,6 +756,18 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
   }
 }
 
+// Update the cost diff and combo of a pair of histograms. This needs to be
+// called when the the histograms have been merged with a third one.
+static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
+                                 const VP8LHistogram* const h2,
+                                 double threshold,
+                                 HistogramPair* const pair) {
+  const double sum_cost = h1->bit_cost_ + h2->bit_cost_;
+  pair->cost_combo = 0.;
+  GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo);
+  pair->cost_diff = pair->cost_combo - sum_cost;
+}
+
 // Create a pair from indices "idx1" and "idx2" provided its cost
 // is inferior to "threshold", a negative entropy.
 // It returns the cost of the pair, or 0. if it superior to threshold.
@@ -765,7 +777,6 @@ static double HistoQueuePush(HistoQueue* const histo_queue,
   const VP8LHistogram* h1;
   const VP8LHistogram* h2;
   HistogramPair pair;
-  double sum_cost;
 
   assert(threshold <= 0.);
   if (idx1 > idx2) {
@@ -777,10 +788,8 @@ static double HistoQueuePush(HistoQueue* const histo_queue,
   pair.idx2 = idx2;
   h1 = histograms[idx1];
   h2 = histograms[idx2];
-  sum_cost = h1->bit_cost_ + h2->bit_cost_;
-  pair.cost_combo = 0.;
-  GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair.cost_combo);
-  pair.cost_diff = pair.cost_combo - sum_cost;
+
+  HistoQueueUpdatePair(h1, h2, threshold, &pair);
 
   // Do not even consider the pair if it does not improve the entropy.
   if (pair.cost_diff >= threshold) return 0.;
@@ -973,8 +982,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
       }
       if (do_eval) {
         // Re-evaluate the cost of an updated pair.
-        GetCombinedHistogramEntropy(histograms[p->idx1], histograms[p->idx2], 0,
-                                    &p->cost_diff);
+        HistoQueueUpdatePair(histograms[p->idx1], histograms[p->idx2], 0., p);
         if (p->cost_diff >= 0.) {
           HistoQueuePopPair(&histo_queue, p);
           continue;

From e85d3313d6d52b1e9c6c181b488fc0831a747de8 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Sat, 3 Nov 2018 11:31:29 -0700
Subject: [PATCH 12/12] update ChangeLog

Change-Id: I39043d4986664312947a0668cb1b7bfbcf5a2477
---
 ChangeLog | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 9fd9acfe..aa3cdab8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,23 +1,116 @@
+fa8210e4 Fix pair update in stochastic entropy merging.
+825389ac README.mux: add a reference to the AnimDecoder API
+3be698c3 CMake: fix webp_js compilation
+4cbb4caf update NEWS
+f5a5918d bump version to 1.0.1
+d61385db Speed-up: Make sure we only initialize histograms when needed.
+0c570316 update AUTHORS
+301a2dda img2webp: add help note about arguments from a file
+f0abab92 Speedups for empty histograms.
+f2dfd925 Split HistogramAdd to only have the high level logic in C.
+06b7bc7d Fix compilation on windows and clang-cl+ninja.
+98179495 remove some minor TODOs
+cbf82cc0 Remove AVX2 files.
+5030e902 Merge "TIFF decoder: remove unused KINV definition"
+ac543311 Remove a few more useless #defines
+123d3306 TIFF decoder: remove unused KINV definition
+ef1094b0 Merge "- install pkg-config files during the CMake build"
+b911fbc9 libwebp: Remove duplicate GIFDisplayError in anim_util
+eee00b66 - install pkg-config files during the CMake build
+ac3ec8c9 Merge "Clean-up the common sources in dsp."
+3e13da7b Clean-up the common sources in dsp.
+5c395f1d libwebp: cmake-format all
+e7a69729 libwebp: Add extras targets in CMakeLists.txt
+e52485d6 libwebp: Rename macros in webpmux.c
+92dc0f09 clean-up MakeInputImageCopy()
+39952de2 VP8IteratorImport: add missing 'const'
+382af7a2 clean-up WebPBlendAlpha
+14d020f6 libwebp: Use ExUtilGet*() in anim_diff
+0d92ff25 libwebp: remove useless variable in gif2webp
+556cb1b4 Merge "CMake: Set WEBP_BUILD_GIF2WEBP to off"
+da26ee49 CMake: Set WEBP_BUILD_GIF2WEBP to off
+b2a867c0 cwebp: Don't premultiply during -resize if -exact
+637141bc pngdec: fix build w/libpng < 1.4.x
+bc5092b1 pngdec: set memory functions
+50d8345a Fix CMake math library.
+6aa3e8aa Fix math library on Visual Studio.
+d71df4e2 Fix math library finding in CMake.
+de08d727 cosmetics: normalize include guard comment
+009562b4 vwebp: Fix bug when Dispose then NoBlend frames
+423f2579 Fix up CMake to create targets.
+907208f9 Wait for all threads to be done in DecodeRemaining.
+4649b3c4 vwebp: Add background color display option
+78ad57a3 Fix bad glClearColor parameters
+da96d8d9 Allow for a non-initialized alpha decompressor in DoRemap.
+2563db47 fix rescaling rounding inaccuracy
+211f37ee fix endian problems in pattern copy
+5f0f5c07 Make sure partition #0 is read before VP8 data in IDecode.
+de98732b fix GetColorf() bug
+4338cd36 misc fixes in libwebpmux
+e00af13e fix signatures after a9ceda7ff1
+a9ceda7f Speed-up chunk list operations.
+2281bbf6 Merge "Better handling of bogus Huffman codes."
+39cb9aad Better handling of bogus Huffman codes.
+89cc9d37 Merge "fix read-overflow while parsing VP8X chunk"
+95fd6507 fix read-overflow while parsing VP8X chunk
+9e729fe1 Fix VP8IoTeardownHook being called twice on worker sync failure
+29fb8562 Merge "muxread,anmf: fail on multiple image chunks"
+eb82ce76 muxread,anmf: fail on multiple image chunks
+1344a2e9 fix alpha-filtering crash when image width is larger than radius
+be738c6d muxread,ChunkVerifyAndAssign: validate chunk_size
+2c70ad76 muxread,CreateInternal: fix riff size checks
+569001f1 Fix for thread race heap-use-after-free
+c56a02d9 Android.mk: use LOCAL_EXPORT_C_INCLUDES w/public libs
+15795596 CMakeLists.txt,cosmetics: normalize if() formatting
+1a44c233 Merge "cmake: add support for webpmux"
+e9569ad7 Merge "configure,*am,cosmetics: s/WANT_/BUILD_/"
+35c7de6f cmake: add support for webpmux
+0f25e61c WebpToSDL(): fix the return value in case of error
+5d8985de configure,*am,cosmetics: s/WANT_/BUILD_/
+895fd28f Merge "man/Makefile.am: add img2webp.1"
+5cf3e2af man/Makefile.am: add img2webp.1
+2a9de5b9 Add build rules for anim_diff & anim_dump utils.
+71ed73cf fix invalid check for buffer size
+af0e4fbb gif2webp: fix transcode of loop count=65535
+dce5d764 Limit memory allocation when reading invalid Huffman codes.
+f9df0081 Merge "cmake: quiet glut deprecation warnings on OS X"
+dc39b16f webpmux.1: correct grammar
+c7aa1264 cwebp.c: fix a missing \n
+53aa51e9 Merge tag 'v1.0.0'
+698b8844 update ChangeLog (tag: v1.0.0)
 8d510751 webp-container-spec: correct frame duration=0 note
 e6b2164e vwebp: Copy Chrome's behavior w/frame duration == 0
+094b3b28 cmake: quiet glut deprecation warnings on OS X
+71c39a06 webp-container-spec: correct frame duration=0 note
+fd3d5756 vwebp: Copy Chrome's behavior w/frame duration == 0
+b0c966fb Build vwebp from CMake.
 d20b7707 update ChangeLog (tag: v1.0.0-rc3)
 0d5fad46 add WEBP_DSP_INIT / WEBP_DSP_INIT_FUNC
+d77bf512 add WEBP_DSP_INIT / WEBP_DSP_INIT_FUNC
 c1cb86af fix 16b overflow in SSE2
 e577feb7 makefile.unix: add DEBUG flag for compiling w/ debug-symbol
 99be34b3 cwebp,get_disto: fix bpp output
+e122e511 cwebp,get_disto: fix bpp output
 f5565ca8 cmake: Make sure we use near-lossless by default.
 d898dc14 fix bug in WebPImport565: alpha value was not set
+1c8f358d Fix CMake with WASM.
+a0215fb7 webp_js: fix webp_js demo html
 882784b0 update ChangeLog (tag: v1.0.0-rc2)
 2f930e08 Revert "Use proper targets for CMake."
 8165e8fb Use proper targets for CMake.
 3f157dd5 Remove some very hard TODOs.
+abb47760 Merge "Use proper targets for CMake."
 cd758a17 {de,}mux/Makefile.am: add missing headers
+e155dda0 Use proper targets for CMake.
 b892b8ba makefile.unix,dist: use ascii for text output
 64a57d05 add -version option to anim_dump,anim_diff and img2webp
+994be82d Merge "Remove some very hard TODOs."
+4033e1d7 Remove some very hard TODOs.
 fc1b8e3a webp_js: fix webp_js demo html
 15aa48d9 update ChangeLog (tag: v1.0.0-rc1)
 e607dabc update AUTHORS
 38410c08 [CFI] Remove function pointer casts
+978eec25 [CFI] Remove function pointer casts
 c57b2736 bump version to 1.0.0
 cba28853 update NEWS
 c909d531 Merge "remove some deprecation warning on MacOSX"