Merge "make VP8LPredictor[01]_C() static" into main

Merge changes Ie43dc5ef,I94cd8bab into main
* changes: Do*Filter_*: remove row & num_rows parameters Do*Filter_C: remove dead 'inverse' code paths
2025-07-15 21:39:59 +02:00 · 2024-08-22 17:35:52 +00:00 · 2024-08-19 18:51:06 +00:00 · 2024-08-16 10:58:45 -07:00 · 2024-08-12 19:36:31 -07:00 · 2024-08-08 18:13:48 -07:00
92 changed files with 2604 additions and 1557 deletions
--- a/.gitignore
+++ b/.gitignore
@ -52,5 +52,6 @@ tests/fuzzer/animdecoder_fuzzer
 tests/fuzzer/animencoder_fuzzer
 tests/fuzzer/demux_api_fuzzer
 tests/fuzzer/enc_dec_fuzzer
+tests/fuzzer/huffman_fuzzer
 tests/fuzzer/mux_demux_api_fuzzer
 tests/fuzzer/simple_api_fuzzer
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -45,6 +45,7 @@ option(WEBP_BUILD_LIBWEBPMUX "Build the libwebpmux library." ON)
 option(WEBP_BUILD_WEBPMUX "Build the webpmux command line tool." ON)
 option(WEBP_BUILD_EXTRAS "Build extras." ON)
 option(WEBP_BUILD_WEBP_JS "Emscripten build of webp.js." OFF)
+option(WEBP_BUILD_FUZZTEST "Build the fuzztest tests." OFF)
 option(WEBP_USE_THREAD "Enable threading support" ON)
 option(WEBP_NEAR_LOSSLESS "Enable near-lossless encoding" ON)
 option(WEBP_ENABLE_SWAP_16BIT_CSP "Enable byte swap for 16 bit colorspaces."
@ -375,9 +376,11 @@ if(XCODE)
 endif()
 target_link_libraries(webpdecoder ${WEBP_DEP_LIBRARIES})
 target_include_directories(
-  webpdecoder PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
-  INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
-            $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+  webpdecoder
+  PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
+  INTERFACE
+    "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR};${CMAKE_CURRENT_BINARY_DIR}>"
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
 set_target_properties(
  webpdecoder
  PROPERTIES PUBLIC_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/src/webp/decode.h;\
@ -479,6 +482,7 @@ if(WEBP_BUILD_ANIM_UTILS
   OR WEBP_BUILD_CWEBP
   OR WEBP_BUILD_DWEBP
   OR WEBP_BUILD_EXTRAS
+   OR WEBP_BUILD_FUZZTEST
   OR WEBP_BUILD_GIF2WEBP
   OR WEBP_BUILD_IMG2WEBP
   OR WEBP_BUILD_VWEBP
@ -771,6 +775,10 @@ if(WEBP_BUILD_ANIM_UTILS)
  target_include_directories(anim_dump PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src)
 endif()

+if(WEBP_BUILD_FUZZTEST)
+  add_subdirectory(tests/fuzzer)
+endif()
+
 # Install the different headers and libraries.
 install(
  TARGETS ${INSTALLED_LIBRARIES}
--- a/3
+++ b/3
@ -1,3 +1,6 @@
+8a6a55bb update NEWS
+cf7c5a5d provide a way to opt-out/override WEBP_NODISCARD
+cc34288a update ChangeLog (tag: v1.4.0-rc1)
 f13c0886 NEWS: fix date
 74555950 Merge "vwebp: fix window title when options are given" into 1.4.0
 d781646c vwebp: fix window title when options are given
--- a/4
+++ b/4
@ -1,4 +1,4 @@
- 4/2/2024: version 1.4.0
+- 4/12/2024: version 1.4.0
  This is a binary compatible release.
  * API changes:
    - libwebpmux: WebPAnimEncoderSetChunk, WebPAnimEncoderGetChunk,
@ -7,6 +7,8 @@
    - extras: SharpYuvEstimate420Risk
  * further security related hardening in libwebp & examples
  * some minor optimizations in the lossless encoder
+  * added WEBP_NODISCARD to report unused result warnings; enable with
+    -DWEBP_ENABLE_NODISCARD=1
  * improvements and corrections in webp-container-spec.txt and
    webp-lossless-bitstream-spec.txt (#611)
  * miscellaneous warning, bug & build fixes (#615, #619, #632, #635)
--- a/README.md
+++ b/README.md
@ -42,7 +42,7 @@ See the [APIs documentation](doc/api.md), and API usage examples in the

 ## Bugs

-Please report all bugs to the issue tracker: https://bugs.chromium.org/p/webp
+Please report all bugs to the issue tracker: https://issues.webmproject.org

 Patches welcome! See [how to contribute](CONTRIBUTING.md).

--- a/configure.ac
+++ b/configure.ac
@ -1,5 +1,5 @@
 AC_INIT([libwebp], [1.4.0],
-        [https://bugs.chromium.org/p/webp],,
+        [https://issues.webmproject.org],,
        [https://developers.google.com/speed/webp])
 AC_CANONICAL_HOST
 AC_PREREQ([2.60])
--- a/doc/building.md
+++ b/doc/building.md
@ -228,4 +228,4 @@ generated code, but is untested.
 ## Javascript decoder

 Libwebp can be compiled into a JavaScript decoder using Emscripten and CMake.
-See the [corresponding documentation](../README.md)
+See the [corresponding documentation](../webp_js/README.md)
--- a/doc/webp-container-spec.txt
+++ b/doc/webp-container-spec.txt
@ -781,7 +781,8 @@ _VP8X.field_ means the field in the 'VP8X' Chunk with the same description.
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 VP8X.flags.hasAnimation MUST be TRUE
 canvas ← new image of size VP8X.canvasWidth x VP8X.canvasHeight with
-         background color ANIM.background_color.
+         background color ANIM.background_color or
+         application-defined color.
 loop_count ← ANIM.loopCount
 dispose_method ← Dispose to background color
 if loop_count == 0:
@ -809,6 +810,7 @@ for loop = 0..loop_count - 1
        bitstream subchunks not found in 'Frame Data' earlier MUST
          be TRUE
        frame_params.bitstream = bitstream_data
+    apply dispose_method.
    render frame with frame_params.alpha and frame_params.bitstream
      on canvas with top-left corner at (frame_params.frameX,
      frame_params.frameY), using Blending method
--- a/doc/webp-lossless-bitstream-spec.txt
+++ b/doc/webp-lossless-bitstream-spec.txt
@ -351,7 +351,7 @@ int ClampAddSubtractHalf(int a, int b) {
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 There are special handling rules for some border pixels. If there is a
-prediction transform, regardless of the mode \[0..13\] for these pixels, the
+predictor transform, regardless of the mode \[0..13\] for these pixels, the
 predicted value for the left-topmost pixel of the image is 0xff000000, all
 pixels on the top row are L-pixel, and all pixels on the leftmost column are
 T-pixel.
--- a/examples/anim_diff.c
+++ b/examples/anim_diff.c
@ -16,7 +16,7 @@
 #include <assert.h>
 #include <limits.h>
 #include <stdio.h>
-#include <stdlib.h>  // for 'strtod'.
+#include <stdlib.h>
 #include <string.h>  // for 'strcmp'.

 #include "./anim_util.h"
@ -206,8 +206,9 @@ static void Help(void) {
  printf("  -version ............ print version number and exit\n");
 }

+// Returns 0 on success, 1 if animation files differ, and 2 for any error.
 int main(int argc, const char* argv[]) {
-  int return_code = -1;
+  int return_code = 2;
  int dump_frames = 0;
  const char* dump_folder = NULL;
  double min_psnr = 0.;
@ -269,18 +270,18 @@ int main(int argc, const char* argv[]) {
    }
    if (parse_error) {
      Help();
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(return_code);
    }
  }
  if (argc < 3) {
    Help();
-    FREE_WARGV_AND_RETURN(-1);
+    FREE_WARGV_AND_RETURN(return_code);
  }


  if (!got_input2) {
    Help();
-    FREE_WARGV_AND_RETURN(-1);
+    FREE_WARGV_AND_RETURN(return_code);
  }

  if (dump_frames) {
@ -293,7 +294,7 @@ int main(int argc, const char* argv[]) {
    if (!ReadAnimatedImage(files[i], &images[i], dump_frames, dump_folder)) {
      WFPRINTF(stderr, "Error decoding file: %s\n Aborting.\n",
               (const W_CHAR*)files[i]);
-      return_code = -2;
+      return_code = 2;
      goto End;
    } else {
      MinimizeAnimationFrames(&images[i], max_diff);
@ -304,7 +305,7 @@ int main(int argc, const char* argv[]) {
                                premultiply, min_psnr)) {
    WFPRINTF(stderr, "\nFiles %s and %s differ.\n", (const W_CHAR*)files[0],
             (const W_CHAR*)files[1]);
-    return_code = -3;
+    return_code = 1;
  } else {
    WPRINTF("\nFiles %s and %s are identical.\n", (const W_CHAR*)files[0],
            (const W_CHAR*)files[1]);
--- a/examples/anim_dump.c
+++ b/examples/anim_dump.c
@ -12,6 +12,7 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>  // for 'strcmp'.

 #include "./anim_util.h"
@ -35,6 +36,7 @@ static void Help(void) {
  printf("  -version ............ print version number and exit\n");
 }

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  int error = 0;
  const W_CHAR* dump_folder = TO_W_CHAR(".");
@ -47,7 +49,7 @@ int main(int argc, const char* argv[]) {

  if (argc < 2) {
    Help();
-    FREE_WARGV_AND_RETURN(-1);
+    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }

  for (c = 1; !error && c < argc; ++c) {
@ -73,7 +75,7 @@ int main(int argc, const char* argv[]) {
      suffix = TO_W_CHAR("pam");
    } else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
      Help();
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-version")) {
      int dec_version, demux_version;
      GetAnimatedImageVersions(&dec_version, &demux_version);
@ -82,7 +84,7 @@ int main(int argc, const char* argv[]) {
             (dec_version >> 0) & 0xff,
             (demux_version >> 16) & 0xff, (demux_version >> 8) & 0xff,
             (demux_version >> 0) & 0xff);
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else {
      uint32_t i;
      AnimatedImage image;
@ -121,5 +123,5 @@ int main(int argc, const char* argv[]) {
      ClearAnimatedImage(&image);
    }
  }
-  FREE_WARGV_AND_RETURN(error ? 1 : 0);
+  FREE_WARGV_AND_RETURN(error ? EXIT_FAILURE : EXIT_SUCCESS);
 }
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@ -178,8 +178,14 @@ static void PrintFullLosslessInfo(const WebPAuxStats* const stats,
    if (stats->lossless_features & 8) fprintf(stderr, " PALETTE");
    fprintf(stderr, "\n");
  }
-  fprintf(stderr, "  * Precision Bits: histogram=%d transform=%d cache=%d\n",
-          stats->histogram_bits, stats->transform_bits, stats->cache_bits);
+  fprintf(stderr, "  * Precision Bits: histogram=%d", stats->histogram_bits);
+  if (stats->lossless_features & 1) {
+    fprintf(stderr, " prediction=%d", stats->transform_bits);
+  }
+  if (stats->lossless_features & 2) {
+    fprintf(stderr, " cross-color=%d", stats->cross_color_transform_bits);
+  }
+  fprintf(stderr, " cache=%d\n", stats->cache_bits);
  if (stats->palette_size > 0) {
    fprintf(stderr, "  * Palette size:   %d\n", stats->palette_size);
  }
@ -651,8 +657,9 @@ static const char* const kErrorMessages[VP8_ENC_ERROR_LAST] = {

 //------------------------------------------------------------------------------

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
-  int return_value = -1;
+  int return_value = EXIT_FAILURE;
  const char* in_file = NULL, *out_file = NULL, *dump_file = NULL;
  FILE* out = NULL;
  int c;
@ -686,22 +693,22 @@ int main(int argc, const char* argv[]) {
      !WebPPictureInit(&original_picture) ||
      !WebPConfigInit(&config)) {
    fprintf(stderr, "Error! Version mismatch!\n");
-    FREE_WARGV_AND_RETURN(-1);
+    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }

  if (argc == 1) {
    HelpShort();
-    FREE_WARGV_AND_RETURN(0);
+    FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
  }

  for (c = 1; c < argc; ++c) {
    int parse_error = 0;
    if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
      HelpShort();
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-H") || !strcmp(argv[c], "-longhelp")) {
      HelpLong();
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-o") && c + 1 < argc) {
      out_file = (const char*)GET_WARGV(argv, ++c);
    } else if (!strcmp(argv[c], "-d") && c + 1 < argc) {
@ -842,7 +849,7 @@ int main(int argc, const char* argv[]) {
      printf("libsharpyuv: %d.%d.%d\n",
             (sharpyuv_version >> 24) & 0xff, (sharpyuv_version >> 16) & 0xffff,
             sharpyuv_version & 0xff);
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-progress")) {
      show_progress = 1;
    } else if (!strcmp(argv[c], "-quiet")) {
@ -904,7 +911,7 @@ int main(int argc, const char* argv[]) {
        if (i == kNumTokens) {
          fprintf(stderr, "Error! Unknown metadata type '%.*s'\n",
                  (int)(token - start), start);
-          FREE_WARGV_AND_RETURN(-1);
+          FREE_WARGV_AND_RETURN(EXIT_FAILURE);
        }
        start = token + 1;
      }
@ -923,14 +930,14 @@ int main(int argc, const char* argv[]) {
    } else if (argv[c][0] == '-') {
      fprintf(stderr, "Error! Unknown option '%s'\n", argv[c]);
      HelpLong();
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(EXIT_FAILURE);
    } else {
      in_file = (const char*)GET_WARGV(argv, c);
    }

    if (parse_error) {
      HelpLong();
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(EXIT_FAILURE);
    }
  }
  if (in_file == NULL) {
@ -1231,7 +1238,7 @@ int main(int argc, const char* argv[]) {
      PrintMetadataInfo(&metadata, metadata_written);
    }
  }
-  return_value = 0;
+  return_value = EXIT_SUCCESS;

 Error:
  WebPMemoryWriterClear(&memory_writer);
--- a/examples/dwebp.c
+++ b/examples/dwebp.c
@ -177,6 +177,7 @@ static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
  return external_buffer;
 }

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  int ok = 0;
  const char* in_file = NULL;
@ -197,14 +198,14 @@ int main(int argc, const char* argv[]) {

  if (!WebPInitDecoderConfig(&config)) {
    fprintf(stderr, "Library version mismatch!\n");
-    FREE_WARGV_AND_RETURN(-1);
+    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }

  for (c = 1; c < argc; ++c) {
    int parse_error = 0;
    if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
      Help();
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-o") && c < argc - 1) {
      out_file = (const char*)GET_WARGV(argv, ++c);
    } else if (!strcmp(argv[c], "-alpha")) {
@ -227,7 +228,7 @@ int main(int argc, const char* argv[]) {
      const int version = WebPGetDecoderVersion();
      printf("%d.%d.%d\n",
             (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-pgm")) {
      format = PGM;
    } else if (!strcmp(argv[c], "-yuv")) {
@ -293,21 +294,21 @@ int main(int argc, const char* argv[]) {
    } else if (argv[c][0] == '-') {
      fprintf(stderr, "Unknown option '%s'\n", argv[c]);
      Help();
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(EXIT_FAILURE);
    } else {
      in_file = (const char*)GET_WARGV(argv, c);
    }

    if (parse_error) {
      Help();
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(EXIT_FAILURE);
    }
  }

  if (in_file == NULL) {
    fprintf(stderr, "missing input file!!\n");
    Help();
-    FREE_WARGV_AND_RETURN(-1);
+    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }

  if (quiet) verbose = 0;
@ -316,7 +317,7 @@ int main(int argc, const char* argv[]) {
    VP8StatusCode status = VP8_STATUS_OK;
    size_t data_size = 0;
    if (!LoadWebP(in_file, &data, &data_size, bitstream)) {
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(EXIT_FAILURE);
    }

    switch (format) {
@ -415,7 +416,7 @@ int main(int argc, const char* argv[]) {
  WebPFreeDecBuffer(output_buffer);
  WebPFree((void*)external_buffer);
  WebPFree((void*)data);
-  FREE_WARGV_AND_RETURN(ok ? 0 : -1);
+  FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }

 //------------------------------------------------------------------------------
--- a/examples/gif2webp.c
+++ b/examples/gif2webp.c
@ -96,6 +96,7 @@ static void Help(void) {

 //------------------------------------------------------------------------------

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  int verbose = 0;
  int gif_error = GIF_ERROR;
@ -140,7 +141,7 @@ int main(int argc, const char* argv[]) {
      !WebPPictureInit(&frame) || !WebPPictureInit(&curr_canvas) ||
      !WebPPictureInit(&prev_canvas)) {
    fprintf(stderr, "Error! Version mismatch!\n");
-    FREE_WARGV_AND_RETURN(-1);
+    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }
  config.lossless = 1;  // Use lossless compression by default.

@ -150,14 +151,14 @@ int main(int argc, const char* argv[]) {

  if (argc == 1) {
    Help();
-    FREE_WARGV_AND_RETURN(0);
+    FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
  }

  for (c = 1; c < argc; ++c) {
    int parse_error = 0;
    if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
      Help();
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-o") && c < argc - 1) {
      out_file = GET_WARGV(argv, ++c);
    } else if (!strcmp(argv[c], "-lossy")) {
@ -216,7 +217,7 @@ int main(int argc, const char* argv[]) {
          fprintf(stderr, "Error! Unknown metadata type '%.*s'\n",
                  (int)(token - start), start);
          Help();
-          FREE_WARGV_AND_RETURN(-1);
+          FREE_WARGV_AND_RETURN(EXIT_FAILURE);
        }
        start = token + 1;
      }
@ -229,7 +230,7 @@ int main(int argc, const char* argv[]) {
             (enc_version >> 16) & 0xff, (enc_version >> 8) & 0xff,
             enc_version & 0xff, (mux_version >> 16) & 0xff,
             (mux_version >> 8) & 0xff, mux_version & 0xff);
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-quiet")) {
      quiet = 1;
      enc_options.verbose = 0;
@ -242,14 +243,14 @@ int main(int argc, const char* argv[]) {
    } else if (argv[c][0] == '-') {
      fprintf(stderr, "Error! Unknown option '%s'\n", argv[c]);
      Help();
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(EXIT_FAILURE);
    } else {
      in_file = GET_WARGV(argv, c);
    }

    if (parse_error) {
      Help();
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(EXIT_FAILURE);
    }
  }

@ -593,7 +594,7 @@ int main(int argc, const char* argv[]) {
 #endif
  }

-  FREE_WARGV_AND_RETURN(!ok);
+  FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }

 #else  // !WEBP_HAVE_GIF
@ -601,7 +602,7 @@ int main(int argc, const char* argv[]) {
 int main(int argc, const char* argv[]) {
  fprintf(stderr, "GIF support not enabled in %s.\n", argv[0]);
  (void)argc;
-  return 0;
+  return EXIT_FAILURE;
 }

 #endif
--- a/examples/img2webp.c
+++ b/examples/img2webp.c
@ -130,6 +130,7 @@ static int SetLoopCount(int loop_count, WebPData* const webp_data) {

 //------------------------------------------------------------------------------

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  const char* output = NULL;
  WebPAnimEncoder* enc = NULL;
@ -151,7 +152,7 @@ int main(int argc, const char* argv[]) {
  INIT_WARGV(argc, argv);

  ok = ExUtilInitCommandLineArguments(argc - 1, argv + 1, &cmd_args);
-  if (!ok) FREE_WARGV_AND_RETURN(1);
+  if (!ok) FREE_WARGV_AND_RETURN(EXIT_FAILURE);

  argc = cmd_args.argc_;
  argv = cmd_args.argv_;
@ -199,7 +200,7 @@ int main(int argc, const char* argv[]) {
        verbose = 1;
      } else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
        Help();
-        FREE_WARGV_AND_RETURN(0);
+        FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
      } else if (!strcmp(argv[c], "-version")) {
        const int enc_version = WebPGetEncoderVersion();
        const int mux_version = WebPGetMuxVersion();
@ -335,5 +336,5 @@ int main(int argc, const char* argv[]) {
  }
  WebPDataClear(&webp_data);
  ExUtilDeleteCommandLineArguments(&cmd_args);
-  FREE_WARGV_AND_RETURN(ok ? 0 : 1);
+  FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }
--- a/examples/vwebp.c
+++ b/examples/vwebp.c
@ -506,7 +506,7 @@ int main(int argc, char* argv[]) {

  if (!WebPInitDecoderConfig(config)) {
    fprintf(stderr, "Library version mismatch!\n");
-    FREE_WARGV_AND_RETURN(-1);
+    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }
  config->options.dithering_strength = 50;
  config->options.alpha_dithering_strength = 100;
@ -518,7 +518,7 @@ int main(int argc, char* argv[]) {
    int parse_error = 0;
    if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
      Help();
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-noicc")) {
      kParams.use_color_profile = 0;
    } else if (!strcmp(argv[c], "-nofancy")) {
@ -541,7 +541,7 @@ int main(int argc, char* argv[]) {
             (dec_version >> 16) & 0xff, (dec_version >> 8) & 0xff,
             dec_version & 0xff, (dmux_version >> 16) & 0xff,
             (dmux_version >> 8) & 0xff, dmux_version & 0xff);
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-mt")) {
      config->options.use_threads = 1;
    } else if (!strcmp(argv[c], "--")) {
@ -553,7 +553,7 @@ int main(int argc, char* argv[]) {
    } else if (argv[c][0] == '-') {
      printf("Unknown option '%s'\n", argv[c]);
      Help();
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(EXIT_FAILURE);
    } else {
      kParams.file_name = (const char*)GET_WARGV(argv, c);
      file_name_argv_index = c;
@ -561,14 +561,14 @@ int main(int argc, char* argv[]) {

    if (parse_error) {
      Help();
-      FREE_WARGV_AND_RETURN(-1);
+      FREE_WARGV_AND_RETURN(EXIT_FAILURE);
    }
  }

  if (kParams.file_name == NULL) {
    printf("missing input file!!\n");
    Help();
-    FREE_WARGV_AND_RETURN(0);
+    FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
  }

  if (!ImgIoUtilReadFile(kParams.file_name,
@ -643,11 +643,11 @@ int main(int argc, char* argv[]) {

  // Should only be reached when using FREEGLUT:
  ClearParams();
-  FREE_WARGV_AND_RETURN(0);
+  FREE_WARGV_AND_RETURN(EXIT_SUCCESS);

 Error:
  ClearParams();
-  FREE_WARGV_AND_RETURN(-1);
+  FREE_WARGV_AND_RETURN(EXIT_FAILURE);
 }

 #else   // !WEBP_HAVE_GL
@ -655,7 +655,7 @@ int main(int argc, char* argv[]) {
 int main(int argc, const char* argv[]) {
  fprintf(stderr, "OpenGL support not enabled in %s.\n", argv[0]);
  (void)argc;
-  return 0;
+  return EXIT_FAILURE;
 }

 #endif
--- a/examples/webpinfo.c
+++ b/examples/webpinfo.c
@ -14,6 +14,7 @@

 #include <assert.h>
 #include <stdio.h>
+#include <stdlib.h>

 #ifdef HAVE_CONFIG_H
 #include "webp/config.h"
@ -1120,6 +1121,7 @@ static void Help(void) {
         "  -bitstream_info .... Parse bitstream header.\n");
 }

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  int c, quiet = 0, show_diag = 0, show_summary = 0;
  int parse_bitstream = 0;
@ -1130,7 +1132,7 @@ int main(int argc, const char* argv[]) {

  if (argc == 1) {
    Help();
-    FREE_WARGV_AND_RETURN(WEBP_INFO_OK);
+    FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
  }

  // Parse command-line input.
@ -1138,7 +1140,7 @@ int main(int argc, const char* argv[]) {
    if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help") ||
        !strcmp(argv[c], "-H") || !strcmp(argv[c], "-longhelp")) {
      Help();
-      FREE_WARGV_AND_RETURN(WEBP_INFO_OK);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-quiet")) {
      quiet = 1;
    } else if (!strcmp(argv[c], "-diag")) {
@ -1151,7 +1153,7 @@ int main(int argc, const char* argv[]) {
      const int version = WebPGetDecoderVersion();
      printf("WebP Decoder version: %d.%d.%d\n",
             (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else {  // Assume the remaining are all input files.
      break;
    }
@ -1159,7 +1161,7 @@ int main(int argc, const char* argv[]) {

  if (c == argc) {
    Help();
-    FREE_WARGV_AND_RETURN(WEBP_INFO_INVALID_COMMAND);
+    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }

  // Process input files one by one.
@ -1182,5 +1184,6 @@ int main(int argc, const char* argv[]) {
    webp_info_status = AnalyzeWebP(&webp_info, &webp_data);
    WebPDataClear(&webp_data);
  }
-  FREE_WARGV_AND_RETURN(webp_info_status);
+  FREE_WARGV_AND_RETURN((webp_info_status == WEBP_INFO_OK) ? EXIT_SUCCESS
+                                                           : EXIT_FAILURE);
 }
--- a/examples/webpmux.c
+++ b/examples/webpmux.c
@ -59,6 +59,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+
 #include "webp/decode.h"
 #include "webp/mux.h"
 #include "../examples/example_util.h"
@ -1225,6 +1226,7 @@ static int Process(const Config* config) {
 //------------------------------------------------------------------------------
 // Main.

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  Config config;
  int ok;
@ -1238,7 +1240,7 @@ int main(int argc, const char* argv[]) {
    PrintHelp();
  }
  DeleteConfig(&config);
-  FREE_WARGV_AND_RETURN(!ok);
+  FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }

 //------------------------------------------------------------------------------
--- a/extras/get_disto.c
+++ b/extras/get_disto.c
@ -227,10 +227,11 @@ static void Help(void) {
          WebPGetEnabledInputFileFormats());
 }

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  WebPPicture pic1, pic2;
  size_t size1 = 0, size2 = 0;
-  int ret = 1;
+  int ret = EXIT_FAILURE;
  float disto[5];
  int type = 0;
  int c;
@ -246,7 +247,7 @@ int main(int argc, const char* argv[]) {

  if (!WebPPictureInit(&pic1) || !WebPPictureInit(&pic2)) {
    fprintf(stderr, "Can't init pictures\n");
-    FREE_WARGV_AND_RETURN(1);
+    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }

  for (c = 1; c < argc; ++c) {
@ -262,7 +263,7 @@ int main(int argc, const char* argv[]) {
      use_gray = 1;
    } else if (!strcmp(argv[c], "-h")) {
      help = 1;
-      ret = 0;
+      ret = EXIT_SUCCESS;
    } else if (!strcmp(argv[c], "-o")) {
      if (++c == argc) {
        fprintf(stderr, "missing file name after %s option.\n", argv[c - 1]);
@ -337,7 +338,8 @@ int main(int argc, const char* argv[]) {
      fprintf(stderr, "Error during lossless encoding.\n");
      goto End;
    }
-    ret = ImgIoUtilWriteFile(output, data, data_size) ? 0 : 1;
+    ret = ImgIoUtilWriteFile(output, data, data_size) ? EXIT_SUCCESS
+                                                      : EXIT_FAILURE;
    WebPFree(data);
    if (ret) goto End;
 #else
@ -345,9 +347,10 @@ int main(int argc, const char* argv[]) {
    (void)data_size;
    fprintf(stderr, "Cannot save the difference map. Please recompile "
                    "without the WEBP_REDUCE_CSP flag.\n");
+    goto End;
 #endif  // WEBP_REDUCE_CSP
  }
-  ret = 0;
+  ret = EXIT_SUCCESS;

 End:
  WebPPictureFree(&pic1);
--- a/extras/vwebp_sdl.c
+++ b/extras/vwebp_sdl.c
@ -15,6 +15,7 @@
 // Author: James Zern (jzern@google.com)

 #include <stdio.h>
+#include <stdlib.h>

 #ifdef HAVE_CONFIG_H
 #include "webp/config.h"
@ -49,6 +50,7 @@ static void ProcessEvents(void) {
  }
 }

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, char* argv[]) {
  int c;
  int ok = 0;
@ -61,7 +63,7 @@ int main(int argc, char* argv[]) {
    size_t webp_size = 0;
    if (!strcmp(argv[c], "-h")) {
      printf("Usage: %s [-h] image.webp [more_files.webp...]\n", argv[0]);
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else {
      file = (const char*)GET_WARGV(argv, c);
    }
@ -87,7 +89,7 @@ int main(int argc, char* argv[]) {

 Error:
  SDL_Quit();
-  FREE_WARGV_AND_RETURN(ok ? 0 : 1);
+  FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }

 #else  // !WEBP_HAVE_SDL
--- a/extras/webp_quality.c
+++ b/extras/webp_quality.c
@ -15,6 +15,7 @@
 #include "imageio/imageio_util.h"
 #include "../examples/unicode.h"

+// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  int c;
  int quiet = 0;
@ -27,7 +28,7 @@ int main(int argc, const char* argv[]) {
      quiet = 1;
    } else if (!strcmp(argv[c], "-help") || !strcmp(argv[c], "-h")) {
      printf("webp_quality [-h][-quiet] webp_files...\n");
-      FREE_WARGV_AND_RETURN(0);
+      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else {
      const char* const filename = (const char*)GET_WARGV(argv, c);
      const uint8_t* data = NULL;
@ -50,5 +51,5 @@ int main(int argc, const char* argv[]) {
      free((void*)data);
    }
  }
-  FREE_WARGV_AND_RETURN(ok ? 0 : 1);
+  FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }
--- a/imageio/imageio_util.c
+++ b/imageio/imageio_util.c
@ -89,6 +89,11 @@ int ImgIoUtilReadFile(const char* const file_name,
  }
  fseek(in, 0, SEEK_END);
  file_size = ftell(in);
+  if (file_size == (size_t)-1) {
+    fclose(in);
+    WFPRINTF(stderr, "error getting size of '%s'\n", (const W_CHAR*)file_name);
+    return 0;
+  }
  fseek(in, 0, SEEK_SET);
  // we allocate one extra byte for the \0 terminator
  file_data = (uint8_t*)WebPMalloc(file_size + 1);
--- a/imageio/jpegdec.c
+++ b/imageio/jpegdec.c
@ -206,8 +206,18 @@ struct my_error_mgr {

 static void my_error_exit(j_common_ptr dinfo) {
  struct my_error_mgr* myerr = (struct my_error_mgr*)dinfo->err;
+  // The following code is disabled in fuzzing mode because:
+  // - the logs can be flooded due to invalid JPEG files
+  // - msg_code is wrongfully seen as uninitialized by msan when the libjpeg
+  //   dependency is not built with sanitizers enabled
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  const int msg_code = myerr->pub.msg_code;
  fprintf(stderr, "libjpeg error: ");
  dinfo->err->output_message(dinfo);
+  if (msg_code == JERR_INPUT_EOF || msg_code == JERR_FILE_READ) {
+    fprintf(stderr, "`jpegtran -copy all` MAY be able to process this file.\n");
+  }
+#endif
  longjmp(myerr->setjmp_buffer, 1);
 }

--- a/man/cwebp.1
+++ b/man/cwebp.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH CWEBP 1 "March 26, 2024"
+.TH CWEBP 1 "July 18, 2024"
 .SH NAME
 cwebp \- compress an image file to a WebP file
 .SH SYNOPSIS
@ -299,12 +299,12 @@ Note: each input format may not support all combinations.
 .B \-noasm
 Disable all assembly optimizations.

-.SH BUGS
-Please report all bugs to the issue tracker:
-https://bugs.chromium.org/p/webp
-.br
-Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+.SH EXIT STATUS
+If there were no problems during execution, \fBcwebp\fP exits with the value of
+the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
+.PP
+If an error occurs, \fBcwebp\fP exits with the value of the C constant
+\fBEXIT_FAILURE\fP. This is usually one.

 .SH EXAMPLES
 cwebp \-q 50 -lossless picture.png \-o picture_lossless.webp
@ -324,6 +324,13 @@ https://chromium.googlesource.com/webm/libwebp
 This manual page was written by Pascal Massimino <pascal.massimino@gmail.com>,
 for the Debian project (and may be used by others).

+.SH REPORTING BUGS
+Please report all bugs to the issue tracker:
+https://issues.webmproject.org
+.br
+Patches welcome! See this page to get started:
+https://www.webmproject.org/code/contribute/submitting\-patches/
+
 .SH SEE ALSO
 .BR dwebp (1),
 .BR gif2webp (1)
--- a/man/dwebp.1
+++ b/man/dwebp.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH DWEBP 1 "November 17, 2021"
+.TH DWEBP 1 "July 18, 2024"
 .SH NAME
 dwebp \- decompress a WebP file to an image file
 .SH SYNOPSIS
@ -108,12 +108,12 @@ Print extra information (decoding time in particular).
 .B \-noasm
 Disable all assembly optimizations.

-.SH BUGS
-Please report all bugs to the issue tracker:
-https://bugs.chromium.org/p/webp
-.br
-Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+.SH EXIT STATUS
+If there were no problems during execution, \fBdwebp\fP exits with the value of
+the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
+.PP
+If an error occurs, \fBdwebp\fP exits with the value of the C constant
+\fBEXIT_FAILURE\fP. This is usually one.

 .SH EXAMPLES
 dwebp picture.webp \-o output.png
@ -133,6 +133,13 @@ https://chromium.googlesource.com/webm/libwebp
 This manual page was written by Pascal Massimino <pascal.massimino@gmail.com>,
 for the Debian project (and may be used by others).

+.SH REPORTING BUGS
+Please report all bugs to the issue tracker:
+https://issues.webmproject.org
+.br
+Patches welcome! See this page to get started:
+https://www.webmproject.org/code/contribute/submitting\-patches/
+
 .SH SEE ALSO
 .BR cwebp (1),
 .BR gif2webp (1),
--- a/man/gif2webp.1
+++ b/man/gif2webp.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH GIF2WEBP 1 "November 17, 2021"
+.TH GIF2WEBP 1 "July 18, 2024"
 .SH NAME
 gif2webp \- Convert a GIF image to WebP
 .SH SYNOPSIS
@ -126,12 +126,12 @@ Print extra information.
 .B \-quiet
 Do not print anything.

-.SH BUGS
-Please report all bugs to the issue tracker:
-https://bugs.chromium.org/p/webp
-.br
-Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+.SH EXIT STATUS
+If there were no problems during execution, \fBgif2webp\fP exits with the value
+of the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
+.PP
+If an error occurs, \fBgif2webp\fP exits with the value of the C constant
+\fBEXIT_FAILURE\fP. This is usually one.

 .SH EXAMPLES
 gif2webp picture.gif \-o picture.webp
@ -155,6 +155,13 @@ https://chromium.googlesource.com/webm/libwebp
 This manual page was written by Urvang Joshi <urvang@google.com>, for the
 Debian project (and may be used by others).

+.SH REPORTING BUGS
+Please report all bugs to the issue tracker:
+https://issues.webmproject.org
+.br
+Patches welcome! See this page to get started:
+https://www.webmproject.org/code/contribute/submitting\-patches/
+
 .SH SEE ALSO
 .BR cwebp (1),
 .BR dwebp (1),
--- a/man/img2webp.1
+++ b/man/img2webp.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH IMG2WEBP 1 "March 17, 2023"
+.TH IMG2WEBP 1 "July 18, 2024"
 .SH NAME
 img2webp \- create animated WebP file from a sequence of input images.
 .SH SYNOPSIS
@ -89,17 +89,17 @@ Specify the compression method to use. This parameter controls the
 trade off between encoding speed and the compressed file size and quality.
 Possible values range from 0 to 6. Default value is 4.

+.SH EXIT STATUS
+If there were no problems during execution, \fBimg2webp\fP exits with the value
+of the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
+.PP
+If an error occurs, \fBimg2webp\fP exits with the value of the C constant
+\fBEXIT_FAILURE\fP. This is usually one.
+
 .SH EXAMPLE
 img2webp -loop 2 in0.png -lossy in1.jpg -d 80 in2.tiff -o out.webp
 .br

-.SH BUGS
-Please report all bugs to the issue tracker:
-https://bugs.chromium.org/p/webp
-.br
-Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
-
 .SH AUTHORS
 \fBimg2webp\fP is a part of libwebp and was written by the WebP team.
 .br
@ -109,6 +109,13 @@ https://chromium.googlesource.com/webm/libwebp
 This manual page was written by Pascal Massimino <pascal.massimino@gmail.com>,
 for the Debian project (and may be used by others).

+.SH REPORTING BUGS
+Please report all bugs to the issue tracker:
+https://issues.webmproject.org
+.br
+Patches welcome! See this page to get started:
+https://www.webmproject.org/code/contribute/submitting\-patches/
+
 .SH SEE ALSO
 .BR webpmux (1),
 .BR gif2webp (1)
--- a/man/vwebp.1
+++ b/man/vwebp.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH VWEBP 1 "November 17, 2021"
+.TH VWEBP 1 "July 18, 2024"
 .SH NAME
 vwebp \- decompress a WebP file and display it in a window
 .SH SYNOPSIS
@ -72,12 +72,12 @@ Disable blending and disposal process, for debugging purposes.
 .B 'q' / 'Q' / ESC
 Quit.

-.SH BUGS
-Please report all bugs to the issue tracker:
-https://bugs.chromium.org/p/webp
-.br
-Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+.SH EXIT STATUS
+If there were no problems during execution, \fBvwebp\fP exits with the value of
+the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
+.PP
+If an error occurs, \fBvwebp\fP exits with the value of the C constant
+\fBEXIT_FAILURE\fP. This is usually one.

 .SH EXAMPLES
 vwebp picture.webp
@ -94,6 +94,13 @@ https://chromium.googlesource.com/webm/libwebp
 .PP
 This manual page was written for the Debian project (and may be used by others).

+.SH REPORTING BUGS
+Please report all bugs to the issue tracker:
+https://issues.webmproject.org
+.br
+Patches welcome! See this page to get started:
+https://www.webmproject.org/code/contribute/submitting\-patches/
+
 .SH SEE ALSO
 .BR dwebp (1)
 .br
--- a/man/webpinfo.1
+++ b/man/webpinfo.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH WEBPINFO 1 "November 17, 2021"
+.TH WEBPINFO 1 "July 18, 2024"
 .SH NAME
 webpinfo \- print out the chunk level structure of WebP files
 along with basic integrity checks.
@ -47,12 +47,12 @@ Detailed usage instructions.
 Input files in WebP format. Input files must come last, following
 options (if any). There can be multiple input files.

-.SH BUGS
-Please report all bugs to the issue tracker:
-https://bugs.chromium.org/p/webp
-.br
-Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+.SH EXIT STATUS
+If there were no problems during execution, \fBwebpinfo\fP exits with the value
+of the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
+.PP
+If an error occurs, \fBwebpinfo\fP exits with the value of the C constant
+\fBEXIT_FAILURE\fP. This is usually one.

 .SH EXAMPLES
 .br
@ -73,6 +73,13 @@ https://chromium.googlesource.com/webm/libwebp
 This manual page was written by Hui Su <huisu@google.com>,
 for the Debian project (and may be used by others).

+.SH REPORTING BUGS
+Please report all bugs to the issue tracker:
+https://issues.webmproject.org
+.br
+Patches welcome! See this page to get started:
+https://www.webmproject.org/code/contribute/submitting\-patches/
+
 .SH SEE ALSO
 .BR webpmux (1)
 .br
--- a/man/webpmux.1
+++ b/man/webpmux.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH WEBPMUX 1 "November 17, 2021"
+.TH WEBPMUX 1 "July 18, 2024"
 .SH NAME
 webpmux \- create animated WebP files from non\-animated WebP images, extract
 frames from animated WebP images, and manage XMP/EXIF metadata and ICC profile.
@ -186,12 +186,12 @@ Output file in WebP format.
 .TP
 The nature of EXIF, XMP and ICC data is not checked and is assumed to be valid.

-.SH BUGS
-Please report all bugs to the issue tracker:
-https://bugs.chromium.org/p/webp
-.br
-Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+.SH EXIT STATUS
+If there were no problems during execution, \fBwebpmux\fP exits with the value
+of the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
+.PP
+If an error occurs, \fBwebpmux\fP exits with the value of the C constant
+\fBEXIT_FAILURE\fP. This is usually one.

 .SH EXAMPLES
 .P
@ -262,6 +262,13 @@ https://chromium.googlesource.com/webm/libwebp
 This manual page was written by Vikas Arora <vikaas.arora@gmail.com>,
 for the Debian project (and may be used by others).

+.SH REPORTING BUGS
+Please report all bugs to the issue tracker:
+https://issues.webmproject.org
+.br
+Patches welcome! See this page to get started:
+https://www.webmproject.org/code/contribute/submitting\-patches/
+
 .SH SEE ALSO
 .BR cwebp (1),
 .BR dwebp (1),
--- a/sharpyuv/sharpyuv.c
+++ b/sharpyuv/sharpyuv.c
@ -565,10 +565,11 @@ int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr,
  scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
  scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);

-  return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
-                          rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
-                          v_ptr, v_stride, yuv_bit_depth, width, height,
-                          &scaled_matrix, transfer_type);
+  return DoSharpArgbToYuv(
+      (const uint8_t*)r_ptr, (const uint8_t*)g_ptr, (const uint8_t*)b_ptr,
+      rgb_step, rgb_stride, rgb_bit_depth, (uint8_t*)y_ptr, y_stride,
+      (uint8_t*)u_ptr, u_stride, (uint8_t*)v_ptr, v_stride, yuv_bit_depth,
+      width, height, &scaled_matrix, transfer_type);
 }

 //------------------------------------------------------------------------------
--- a/src/dec/tree_dec.c
+++ b/src/dec/tree_dec.c
@ -16,7 +16,8 @@
 #include "src/utils/bit_reader_inl_utils.h"

 #if !defined(USE_GENERIC_TREE)
-#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64
+#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && \
+    !defined(__wasm__)
 // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
 #define USE_GENERIC_TREE 1   // ALTERNATE_CODE
 #else
--- a/src/dec/vp8l_dec.c
+++ b/src/dec/vp8l_dec.c
@ -20,10 +20,9 @@
 #include "src/dsp/dsp.h"
 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"
-#include "src/dsp/yuv.h"
-#include "src/utils/endian_inl_utils.h"
 #include "src/utils/huffman_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/format_constants.h"

 #define NUM_ARGB_CACHE_ROWS          16

@ -381,7 +380,8 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,

  if (allow_recursion && VP8LReadBits(br, 1)) {
    // use meta Huffman codes.
-    const int huffman_precision = VP8LReadBits(br, 3) + 2;
+    const int huffman_precision =
+        MIN_HUFFMAN_BITS + VP8LReadBits(br, NUM_HUFFMAN_BITS);
    const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision);
    const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision);
    const int huffman_pixs = huffman_xsize * huffman_ysize;
@ -1351,7 +1351,8 @@ static int ReadTransform(int* const xsize, int const* ysize,
  switch (type) {
    case PREDICTOR_TRANSFORM:
    case CROSS_COLOR_TRANSFORM:
-      transform->bits_ = VP8LReadBits(br, 3) + 2;
+      transform->bits_ =
+          MIN_TRANSFORM_BITS + VP8LReadBits(br, NUM_TRANSFORM_BITS);
      ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize_,
                                               transform->bits_),
                             VP8LSubSampleSize(transform->ysize_,
@ -1416,7 +1417,9 @@ VP8LDecoder* VP8LNew(void) {
  return dec;
 }

-void VP8LClear(VP8LDecoder* const dec) {
+// Resets the decoder in its initial state, reclaiming memory.
+// Preserves the dec->status_ value.
+static void VP8LClear(VP8LDecoder* const dec) {
  int i;
  if (dec == NULL) return;
  ClearMetadata(&dec->hdr_);
--- a/src/dec/vp8li_dec.h
+++ b/src/dec/vp8li_dec.h
@ -121,10 +121,6 @@ WEBP_NODISCARD int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);
 // this function. Returns false in case of error, with updated dec->status_.
 WEBP_NODISCARD int VP8LDecodeImage(VP8LDecoder* const dec);

-// Resets the decoder in its initial state, reclaiming memory.
-// Preserves the dec->status_ value.
-void VP8LClear(VP8LDecoder* const dec);
-
 // Clears and deallocate a lossless decoder instance.
 void VP8LDelete(VP8LDecoder* const dec);

--- a/src/dsp/dec_neon.c
+++ b/src/dsp/dec_neon.c
@ -1300,18 +1300,19 @@ static void DC4_NEON(uint8_t* dst) {    // DC
 static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, int size) {
  const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1);  // top-left pixel 'A[-1]'
  const uint8x8_t T = vld1_u8(dst - BPS);  // top row 'A[0..3]'
-  const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL));  // A[c] - A[-1]
+  const uint16x8_t d = vsubl_u8(T, TL);  // A[c] - A[-1]
  int y;
  for (y = 0; y < size; y += 4) {
    // left edge
-    const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1));
-    const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1));
-    const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1));
-    const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1));
-    const int16x8_t r0 = vaddq_s16(L0, d);  // L[r] + A[c] - A[-1]
-    const int16x8_t r1 = vaddq_s16(L1, d);
-    const int16x8_t r2 = vaddq_s16(L2, d);
-    const int16x8_t r3 = vaddq_s16(L3, d);
+    const uint8x8_t L0 = vld1_dup_u8(dst + 0 * BPS - 1);
+    const uint8x8_t L1 = vld1_dup_u8(dst + 1 * BPS - 1);
+    const uint8x8_t L2 = vld1_dup_u8(dst + 2 * BPS - 1);
+    const uint8x8_t L3 = vld1_dup_u8(dst + 3 * BPS - 1);
+    // L[r] + A[c] - A[-1]
+    const int16x8_t r0 = vreinterpretq_s16_u16(vaddw_u8(d, L0));
+    const int16x8_t r1 = vreinterpretq_s16_u16(vaddw_u8(d, L1));
+    const int16x8_t r2 = vreinterpretq_s16_u16(vaddw_u8(d, L2));
+    const int16x8_t r3 = vreinterpretq_s16_u16(vaddw_u8(d, L3));
    // Saturate and store the result.
    const uint32x2_t r0_u32 = vreinterpret_u32_u8(vqmovun_s16(r0));
    const uint32x2_t r1_u32 = vreinterpret_u32_u8(vqmovun_s16(r1));
@ -1572,23 +1573,24 @@ static void TM16_NEON(uint8_t* dst) {
  const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1);  // top-left pixel 'A[-1]'
  const uint8x16_t T = vld1q_u8(dst - BPS);  // top row 'A[0..15]'
  // A[c] - A[-1]
-  const int16x8_t d_lo = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(T), TL));
-  const int16x8_t d_hi = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(T), TL));
+  const uint16x8_t d_lo = vsubl_u8(vget_low_u8(T), TL);
+  const uint16x8_t d_hi = vsubl_u8(vget_high_u8(T), TL);
  int y;
  for (y = 0; y < 16; y += 4) {
    // left edge
-    const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1));
-    const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1));
-    const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1));
-    const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1));
-    const int16x8_t r0_lo = vaddq_s16(L0, d_lo);  // L[r] + A[c] - A[-1]
-    const int16x8_t r1_lo = vaddq_s16(L1, d_lo);
-    const int16x8_t r2_lo = vaddq_s16(L2, d_lo);
-    const int16x8_t r3_lo = vaddq_s16(L3, d_lo);
-    const int16x8_t r0_hi = vaddq_s16(L0, d_hi);
-    const int16x8_t r1_hi = vaddq_s16(L1, d_hi);
-    const int16x8_t r2_hi = vaddq_s16(L2, d_hi);
-    const int16x8_t r3_hi = vaddq_s16(L3, d_hi);
+    const uint8x8_t L0 = vld1_dup_u8(dst + 0 * BPS - 1);
+    const uint8x8_t L1 = vld1_dup_u8(dst + 1 * BPS - 1);
+    const uint8x8_t L2 = vld1_dup_u8(dst + 2 * BPS - 1);
+    const uint8x8_t L3 = vld1_dup_u8(dst + 3 * BPS - 1);
+    // L[r] + A[c] - A[-1]
+    const int16x8_t r0_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L0));
+    const int16x8_t r1_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L1));
+    const int16x8_t r2_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L2));
+    const int16x8_t r3_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L3));
+    const int16x8_t r0_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L0));
+    const int16x8_t r1_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L1));
+    const int16x8_t r2_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L2));
+    const int16x8_t r3_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L3));
    // Saturate and store the result.
    const uint8x16_t row0 = vcombine_u8(vqmovun_s16(r0_lo), vqmovun_s16(r0_hi));
    const uint8x16_t row1 = vcombine_u8(vqmovun_s16(r1_lo), vqmovun_s16(r1_hi));
--- a/src/dsp/dec_sse2.c
+++ b/src/dsp/dec_sse2.c
@ -197,7 +197,7 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {

 #if (USE_TRANSFORM_AC3 == 1)

-static void TransformAC3(const int16_t* in, uint8_t* dst) {
+static void TransformAC3_SSE2(const int16_t* in, uint8_t* dst) {
  const __m128i A = _mm_set1_epi16(in[0] + 4);
  const __m128i c4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
  const __m128i d4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
--- a/src/dsp/enc.c
+++ b/src/dsp/enc.c
@ -332,6 +332,7 @@ static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
 //------------------------------------------------------------------------------
 // luma 16x16 prediction (paragraph 12.3)

+#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
 static void Intra16Preds_C(uint8_t* dst,
                           const uint8_t* left, const uint8_t* top) {
  DCMode(I16DC16 + dst, left, top, 16, 16, 5);
@ -339,10 +340,13 @@ static void Intra16Preds_C(uint8_t* dst,
  HorizontalPred(I16HE16 + dst, left, 16);
  TrueMotion(I16TM16 + dst, left, top, 16);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64

 //------------------------------------------------------------------------------
 // luma 4x4 prediction

+#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
+
 #define DST(x, y) dst[(x) + (y) * BPS]
 #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
@ -529,6 +533,8 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
  HU4(I4HU4 + dst, top);
 }

+#endif  // !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
+
 //------------------------------------------------------------------------------
 // Metric

@ -644,6 +650,7 @@ static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
 // Quantization
 //

+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 static const uint8_t kZigzag[16] = {
  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
 };
@ -675,7 +682,6 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
  return (last >= 0);
 }

-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
                             const VP8Matrix* const mtx) {
  int nz;
@ -760,14 +766,17 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
  VP8EncQuantizeBlock = QuantizeBlock_C;
  VP8EncQuantize2Blocks = Quantize2Blocks_C;
+  VP8EncQuantizeBlockWHT = QuantizeBlock_C;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
+  VP8EncPredLuma4 = Intra4Preds_C;
+  VP8EncPredLuma16 = Intra16Preds_C;
 #endif

  VP8FTransform2 = FTransform2_C;
-  VP8EncPredLuma4 = Intra4Preds_C;
-  VP8EncPredLuma16 = Intra16Preds_C;
  VP8EncPredChroma8 = IntraChromaPreds_C;
  VP8Mean16x4 = Mean16x4_C;
-  VP8EncQuantizeBlockWHT = QuantizeBlock_C;
  VP8Copy4x4 = Copy4x4_C;
  VP8Copy16x8 = Copy16x8_C;

--- a/src/dsp/enc_neon.c
+++ b/src/dsp/enc_neon.c
@ -911,6 +911,267 @@ static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],

 #endif   // !WORK_AROUND_GCC

+#if WEBP_AARCH64
+
+#define DC4_VE4_HE4_TM4_NEON(dst, tbl, res, lane)                              \
+  do {                                                                         \
+    uint8x16_t r;                                                              \
+    r = vqtbl2q_u8(qcombined, tbl);                                            \
+    r = vreinterpretq_u8_u32(                                                  \
+        vsetq_lane_u32(vget_lane_u32(vreinterpret_u32_u8(res), lane),          \
+                       vreinterpretq_u32_u8(r), 1));                           \
+    vst1q_u8(dst, r);                                                          \
+  } while (0)
+
+#define RD4_VR4_LD4_VL4_NEON(dst, tbl)                                         \
+  do {                                                                         \
+    uint8x16_t r;                                                              \
+    r = vqtbl2q_u8(qcombined, tbl);                                            \
+    vst1q_u8(dst, r);                                                          \
+  } while (0)
+
+static void Intra4Preds_NEON(uint8_t* dst, const uint8_t* top) {
+  // 0   1   2   3   4   5   6   7   8   9  10  11  12  13
+  //     L   K   J   I   X   A   B   C   D   E   F   G   H
+  //    -5  -4  -3  -2  -1   0   1   2   3   4   5   6   7
+  static const uint8_t kLookupTbl1[64] = {
+    0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 12, 12,
+    3,  3,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  0,  0,  0,  0,
+    4, 20, 21, 22,  3, 18,  2, 17,  3, 19,  4, 20,  2, 17,  1, 16,
+    2, 18,  3, 19,  1, 16, 31, 31,  1, 17,  2, 18, 31, 31, 31, 31
+  };
+
+  static const uint8_t kLookupTbl2[64] = {
+    20, 21, 22, 23,  5,  6,  7,  8, 22, 23, 24, 25,  6,  7,  8,  9,
+    19, 20, 21, 22, 20, 21, 22, 23, 23, 24, 25, 26, 22, 23, 24, 25,
+    18, 19, 20, 21, 19,  5,  6,  7, 24, 25, 26, 27,  7,  8,  9, 26,
+    17, 18, 19, 20, 18, 20, 21, 22, 25, 26, 27, 28, 23, 24, 25, 27
+  };
+
+  static const uint8_t kLookupTbl3[64] = {
+    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 19, 19, 19, 19,
+    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 18, 18, 18, 18,
+    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 17, 17, 17, 17,
+    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 16, 16, 16, 16
+  };
+
+  const uint8x16x4_t lookup_avgs1 = vld1q_u8_x4(kLookupTbl1);
+  const uint8x16x4_t lookup_avgs2 = vld1q_u8_x4(kLookupTbl2);
+  const uint8x16x4_t lookup_avgs3 = vld1q_u8_x4(kLookupTbl3);
+
+  const uint8x16_t preload = vld1q_u8(top - 5);
+  uint8x16x2_t qcombined;
+  uint8x16_t result0, result1;
+
+  uint8x16_t a = vqtbl1q_u8(preload, lookup_avgs1.val[0]);
+  uint8x16_t b = preload;
+  uint8x16_t c = vextq_u8(a, a, 2);
+
+  uint8x16_t avg3_all = vrhaddq_u8(vhaddq_u8(a, c), b);
+  uint8x16_t avg2_all = vrhaddq_u8(a, b);
+
+  uint8x8_t preload_x8, sub_a, sub_c;
+  uint8_t result_u8;
+  uint8x8_t res_lo, res_hi;
+  uint8x16_t full_b;
+  uint16x8_t sub, sum_lo, sum_hi;
+
+  preload_x8 = vget_low_u8(c);
+  preload_x8 = vset_lane_u8(vgetq_lane_u8(preload, 0), preload_x8, 3);
+
+  result_u8 = (vaddlv_u8(preload_x8) + 4) >> 3;
+
+  avg3_all = vsetq_lane_u8(vgetq_lane_u8(preload, 0), avg3_all, 15);
+  avg3_all = vsetq_lane_u8(result_u8, avg3_all, 14);
+
+  qcombined.val[0] = avg2_all;
+  qcombined.val[1] = avg3_all;
+
+  sub_a = vdup_laneq_u8(preload, 4);
+
+  // preload = {a,b,c,d,...} => full_b = {d,d,d,d,c,c,c,c,b,b,b,b,a,a,a,a}
+  full_b = vqtbl1q_u8(preload, lookup_avgs1.val[1]);
+  // preload = {a,b,c,d,...} => sub_c = {a,b,c,d,a,b,c,d,a,b,c,d,a,b,c,d}
+  sub_c = vreinterpret_u8_u32(vdup_n_u32(
+      vgetq_lane_u32(vreinterpretq_u32_u8(vextq_u8(preload, preload, 5)), 0)));
+
+  sub = vsubl_u8(sub_c, sub_a);
+  sum_lo = vaddw_u8(sub, vget_low_u8(full_b));
+  res_lo = vqmovun_s16(vreinterpretq_s16_u16(sum_lo));
+
+  sum_hi = vaddw_u8(sub, vget_high_u8(full_b));
+  res_hi = vqmovun_s16(vreinterpretq_s16_u16(sum_hi));
+
+  // DC4, VE4, HE4, TM4
+  DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 0, lookup_avgs3.val[0], res_lo, 0);
+  DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 1, lookup_avgs3.val[1], res_lo, 1);
+  DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 2, lookup_avgs3.val[2], res_hi, 0);
+  DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 3, lookup_avgs3.val[3], res_hi, 1);
+
+  // RD4, VR4, LD4, VL4
+  RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 0, lookup_avgs2.val[0]);
+  RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 1, lookup_avgs2.val[1]);
+  RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 2, lookup_avgs2.val[2]);
+  RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 3, lookup_avgs2.val[3]);
+
+  // HD4, HU4
+  result0 = vqtbl2q_u8(qcombined, lookup_avgs1.val[2]);
+  result1 = vqtbl2q_u8(qcombined, lookup_avgs1.val[3]);
+
+  vst1_u8(dst + I4HD4 + BPS * 0, vget_low_u8(result0));
+  vst1_u8(dst + I4HD4 + BPS * 1, vget_high_u8(result0));
+  vst1_u8(dst + I4HD4 + BPS * 2, vget_low_u8(result1));
+  vst1_u8(dst + I4HD4 + BPS * 3, vget_high_u8(result1));
+}
+
+static WEBP_INLINE void Fill_NEON(uint8_t* dst, const uint8_t value) {
+  uint8x16_t a = vdupq_n_u8(value);
+  int i;
+  for (i = 0; i < 16; i++) {
+    vst1q_u8(dst + BPS * i, a);
+  }
+}
+
+static WEBP_INLINE void Fill16_NEON(uint8_t* dst, const uint8_t* src) {
+  uint8x16_t a = vld1q_u8(src);
+  int i;
+  for (i = 0; i < 16; i++) {
+    vst1q_u8(dst + BPS * i, a);
+  }
+}
+
+static WEBP_INLINE void HorizontalPred16_NEON(uint8_t* dst,
+                                              const uint8_t* left) {
+  uint8x16_t a;
+
+  if (left == NULL) {
+    Fill_NEON(dst, 129);
+    return;
+  }
+
+  a = vld1q_u8(left + 0);
+  vst1q_u8(dst + BPS * 0, vdupq_laneq_u8(a, 0));
+  vst1q_u8(dst + BPS * 1, vdupq_laneq_u8(a, 1));
+  vst1q_u8(dst + BPS * 2, vdupq_laneq_u8(a, 2));
+  vst1q_u8(dst + BPS * 3, vdupq_laneq_u8(a, 3));
+  vst1q_u8(dst + BPS * 4, vdupq_laneq_u8(a, 4));
+  vst1q_u8(dst + BPS * 5, vdupq_laneq_u8(a, 5));
+  vst1q_u8(dst + BPS * 6, vdupq_laneq_u8(a, 6));
+  vst1q_u8(dst + BPS * 7, vdupq_laneq_u8(a, 7));
+  vst1q_u8(dst + BPS * 8, vdupq_laneq_u8(a, 8));
+  vst1q_u8(dst + BPS * 9, vdupq_laneq_u8(a, 9));
+  vst1q_u8(dst + BPS * 10, vdupq_laneq_u8(a, 10));
+  vst1q_u8(dst + BPS * 11, vdupq_laneq_u8(a, 11));
+  vst1q_u8(dst + BPS * 12, vdupq_laneq_u8(a, 12));
+  vst1q_u8(dst + BPS * 13, vdupq_laneq_u8(a, 13));
+  vst1q_u8(dst + BPS * 14, vdupq_laneq_u8(a, 14));
+  vst1q_u8(dst + BPS * 15, vdupq_laneq_u8(a, 15));
+}
+
+static WEBP_INLINE void VerticalPred16_NEON(uint8_t* dst, const uint8_t* top) {
+  if (top != NULL) {
+    Fill16_NEON(dst, top);
+  } else {
+    Fill_NEON(dst, 127);
+  }
+}
+
+static WEBP_INLINE void DCMode_NEON(uint8_t* dst, const uint8_t* left,
+                                    const uint8_t* top) {
+  uint8_t s;
+
+  if (top != NULL) {
+    uint16_t dc;
+    dc = vaddlvq_u8(vld1q_u8(top));
+    if (left != NULL) {
+      // top and left present.
+      dc += vaddlvq_u8(vld1q_u8(left));
+      s = vqrshrnh_n_u16(dc, 5);
+    } else {
+      // top but no left.
+      s = vqrshrnh_n_u16(dc, 4);
+    }
+  } else {
+    if (left != NULL) {
+      uint16_t dc;
+      // left but no top.
+      dc = vaddlvq_u8(vld1q_u8(left));
+      s = vqrshrnh_n_u16(dc, 4);
+    } else {
+      // No top, no left, nothing.
+      s = 0x80;
+    }
+  }
+  Fill_NEON(dst, s);
+}
+
+static WEBP_INLINE void TrueMotionHelper_NEON(uint8_t* dst,
+                                              const uint8x8_t outer,
+                                              const uint8x8x2_t inner,
+                                              const uint16x8_t a, int i,
+                                              const int n) {
+  uint8x8_t d1, d2;
+  uint16x8_t r1, r2;
+
+  r1 = vaddl_u8(outer, inner.val[0]);
+  r1 = vqsubq_u16(r1, a);
+  d1 = vqmovun_s16(vreinterpretq_s16_u16(r1));
+  r2 = vaddl_u8(outer, inner.val[1]);
+  r2 = vqsubq_u16(r2, a);
+  d2 = vqmovun_s16(vreinterpretq_s16_u16(r2));
+  vst1_u8(dst + BPS * (i * 4 + n), d1);
+  vst1_u8(dst + BPS * (i * 4 + n) + 8, d2);
+}
+
+static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, const uint8_t* left,
+                                        const uint8_t* top) {
+  int i;
+  uint16x8_t a;
+  uint8x8x2_t inner;
+
+  if (left == NULL) {
+    // True motion without left samples (hence: with default 129 value) is
+    // equivalent to VE prediction where you just copy the top samples.
+    // Note that if top samples are not available, the default value is then
+    // 129, and not 127 as in the VerticalPred case.
+    if (top != NULL) {
+      VerticalPred16_NEON(dst, top);
+    } else {
+      Fill_NEON(dst, 129);
+    }
+    return;
+  }
+
+  // left is not NULL.
+  if (top == NULL) {
+    HorizontalPred16_NEON(dst, left);
+    return;
+  }
+
+  // Neither left nor top are NULL.
+  a = vdupq_n_u16(left[-1]);
+  inner = vld1_u8_x2(top);
+
+  for (i = 0; i < 4; i++) {
+    const uint8x8x4_t outer = vld4_dup_u8(&left[i * 4]);
+
+    TrueMotionHelper_NEON(dst, outer.val[0], inner, a, i, 0);
+    TrueMotionHelper_NEON(dst, outer.val[1], inner, a, i, 1);
+    TrueMotionHelper_NEON(dst, outer.val[2], inner, a, i, 2);
+    TrueMotionHelper_NEON(dst, outer.val[3], inner, a, i, 3);
+  }
+}
+
+static void Intra16Preds_NEON(uint8_t* dst, const uint8_t* left,
+                              const uint8_t* top) {
+  DCMode_NEON(I16DC16 + dst, left, top);
+  VerticalPred16_NEON(I16VE16 + dst, top);
+  HorizontalPred16_NEON(I16HE16 + dst, left);
+  TrueMotion_NEON(I16TM16 + dst, left, top);
+}
+
+#endif // WEBP_AARCH64
+
 //------------------------------------------------------------------------------
 // Entry point

@ -931,9 +1192,17 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
  VP8SSE8x8 = SSE8x8_NEON;
  VP8SSE4x4 = SSE4x4_NEON;

+#if WEBP_AARCH64
+#if BPS == 32
+  VP8EncPredLuma4 = Intra4Preds_NEON;
+#endif
+  VP8EncPredLuma16 = Intra16Preds_NEON;
+#endif
+
 #if !defined(WORK_AROUND_GCC)
  VP8EncQuantizeBlock = QuantizeBlock_NEON;
  VP8EncQuantize2Blocks = Quantize2Blocks_NEON;
+  VP8EncQuantizeBlockWHT = QuantizeBlock_NEON;
 #endif
 }

--- a/src/dsp/filters.c
+++ b/src/dsp/filters.c
@ -26,19 +26,13 @@
    assert(width > 0);                                                         \
    assert(height > 0);                                                        \
    assert(stride >= width);                                                   \
-    assert(row >= 0 && num_rows > 0 && row + num_rows <= height);              \
-    (void)height;  /* Silence unused warning. */                               \
  } while (0)

 #if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
-                                      uint8_t* dst, int length, int inverse) {
+                                      uint8_t* dst, int length) {
  int i;
-  if (inverse) {
-    for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] + pred[i]);
-  } else {
-    for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] - pred[i]);
-  }
+  for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] - pred[i]);
 }

 //------------------------------------------------------------------------------
@ -46,32 +40,23 @@ static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,

 static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
                                             int width, int height, int stride,
-                                             int row, int num_rows,
-                                             int inverse, uint8_t* out) {
-  const uint8_t* preds;
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+                                             uint8_t* out) {
+  const uint8_t* preds = in;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;
-  preds = inverse ? out : in;

-  if (row == 0) {
-    // Leftmost pixel is the same as input for topmost scanline.
-    out[0] = in[0];
-    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
-    row = 1;
-    preds += stride;
-    in += stride;
-    out += stride;
-  }
+  // Leftmost pixel is the same as input for topmost scanline.
+  out[0] = in[0];
+  PredictLine_C(in + 1, preds, out + 1, width - 1);
+  preds += stride;
+  in += stride;
+  out += stride;

  // Filter line-by-line.
-  while (row < last_row) {
+  for (row = 1; row < height; ++row) {
    // Leftmost pixel is predicted from above.
-    PredictLine_C(in, preds - stride, out, 1, inverse);
-    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
-    ++row;
+    PredictLine_C(in, preds - stride, out, 1);
+    PredictLine_C(in + 1, preds, out + 1, width - 1);
    preds += stride;
    in += stride;
    out += stride;
@ -83,33 +68,21 @@ static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,

 static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
                                           int width, int height, int stride,
-                                           int row, int num_rows,
-                                           int inverse, uint8_t* out) {
-  const uint8_t* preds;
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+                                           uint8_t* out) {
+  const uint8_t* preds = in;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;
-  preds = inverse ? out : in;

-  if (row == 0) {
-    // Very first top-left pixel is copied.
-    out[0] = in[0];
-    // Rest of top scan-line is left-predicted.
-    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
-    row = 1;
-    in += stride;
-    out += stride;
-  } else {
-    // We are starting from in-between. Make sure 'preds' points to prev row.
-    preds -= stride;
-  }
+  // Very first top-left pixel is copied.
+  out[0] = in[0];
+  // Rest of top scan-line is left-predicted.
+  PredictLine_C(in + 1, preds, out + 1, width - 1);
+  in += stride;
+  out += stride;

  // Filter line-by-line.
-  while (row < last_row) {
-    PredictLine_C(in, preds, out, width, inverse);
-    ++row;
+  for (row = 1; row < height; ++row) {
+    PredictLine_C(in, preds, out, width);
    preds += stride;
    in += stride;
    out += stride;
@ -128,38 +101,29 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
 #if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
                                           int width, int height, int stride,
-                                           int row, int num_rows,
-                                           int inverse, uint8_t* out) {
-  const uint8_t* preds;
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+                                           uint8_t* out) {
+  const uint8_t* preds = in;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;
-  preds = inverse ? out : in;

  // left prediction for top scan-line
-  if (row == 0) {
-    out[0] = in[0];
-    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
-    row = 1;
-    preds += stride;
-    in += stride;
-    out += stride;
-  }
+  out[0] = in[0];
+  PredictLine_C(in + 1, preds, out + 1, width - 1);
+  preds += stride;
+  in += stride;
+  out += stride;

  // Filter line-by-line.
-  while (row < last_row) {
+  for (row = 1; row < height; ++row) {
    int w;
    // leftmost pixel: predict from above.
-    PredictLine_C(in, preds - stride, out, 1, inverse);
+    PredictLine_C(in, preds - stride, out, 1);
    for (w = 1; w < width; ++w) {
      const int pred = GradientPredictor_C(preds[w - 1],
                                           preds[w - stride],
                                           preds[w - stride - 1]);
-      out[w] = (uint8_t)(in[w] + (inverse ? pred : -pred));
+      out[w] = (uint8_t)(in[w] - pred);
    }
-    ++row;
    preds += stride;
    in += stride;
    out += stride;
@ -174,18 +138,17 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
 #if !WEBP_NEON_OMIT_C_CODE
 static void HorizontalFilter_C(const uint8_t* data, int width, int height,
                               int stride, uint8_t* filtered_data) {
-  DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
-                       filtered_data);
+  DoHorizontalFilter_C(data, width, height, stride, filtered_data);
 }

 static void VerticalFilter_C(const uint8_t* data, int width, int height,
                             int stride, uint8_t* filtered_data) {
-  DoVerticalFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
+  DoVerticalFilter_C(data, width, height, stride, filtered_data);
 }

 static void GradientFilter_C(const uint8_t* data, int width, int height,
                             int stride, uint8_t* filtered_data) {
-  DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
+  DoGradientFilter_C(data, width, height, stride, filtered_data);
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE

--- a/src/dsp/filters_mips_dsp_r2.c
+++ b/src/dsp/filters_mips_dsp_r2.c
@ -31,8 +31,6 @@
    assert(width > 0);                                                         \
    assert(height > 0);                                                        \
    assert(stride >= width);                                                   \
-    assert(row >= 0 && num_rows > 0 && row + num_rows <= height);              \
-    (void)height;  /* Silence unused warning. */                               \
  } while (0)

 #define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do {                        \
@ -184,10 +182,9 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
 // Horizontal filter.

 #define FILTER_LINE_BY_LINE do {                                               \
-    while (row < last_row) {                                                   \
+    for (row = 1; row < height; ++row) {                                       \
      PREDICT_LINE_ONE_PASS(in, preds - stride, out);                          \
      DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0);                          \
-      ++row;                                                                   \
      preds += stride;                                                         \
      in += stride;                                                            \
      out += stride;                                                           \
@ -196,26 +193,17 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,

 static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
                                                     int width, int height,
-                                                     int stride,
-                                                     int row, int num_rows,
-                                                     uint8_t* out) {
-  const uint8_t* preds;
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+                                                     int stride, uint8_t* out) {
+  const uint8_t* preds = in;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;
-  preds = in;

-  if (row == 0) {
-    // Leftmost pixel is the same as input for topmost scanline.
-    out[0] = in[0];
-    PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
-    row = 1;
-    preds += stride;
-    in += stride;
-    out += stride;
-  }
+  // Leftmost pixel is the same as input for topmost scanline.
+  out[0] = in[0];
+  PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
+  preds += stride;
+  in += stride;
+  out += stride;

  // Filter line-by-line.
  FILTER_LINE_BY_LINE;
@ -225,17 +213,15 @@ static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
 static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
                                       int width, int height,
                                       int stride, uint8_t* filtered_data) {
-  DoHorizontalFilter_MIPSdspR2(data, width, height, stride, 0, height,
-                               filtered_data);
+  DoHorizontalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
 }

 //------------------------------------------------------------------------------
 // Vertical filter.

 #define FILTER_LINE_BY_LINE do {                                               \
-    while (row < last_row) {                                                   \
+    for (row = 1; row < height; ++row) {                                       \
      DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0);                      \
-      ++row;                                                                   \
      preds += stride;                                                         \
      in += stride;                                                            \
      out += stride;                                                           \
@ -244,29 +230,17 @@ static void HorizontalFilter_MIPSdspR2(const uint8_t* data,

 static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
                                                   int width, int height,
-                                                   int stride,
-                                                   int row, int num_rows,
-                                                   uint8_t* out) {
-  const uint8_t* preds;
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+                                                   int stride, uint8_t* out) {
+  const uint8_t* preds = in;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;
-  preds = in;

-  if (row == 0) {
-    // Very first top-left pixel is copied.
-    out[0] = in[0];
-    // Rest of top scan-line is left-predicted.
-    PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
-    row = 1;
-    in += stride;
-    out += stride;
-  } else {
-    // We are starting from in-between. Make sure 'preds' points to prev row.
-    preds -= stride;
-  }
+  // Very first top-left pixel is copied.
+  out[0] = in[0];
+  // Rest of top scan-line is left-predicted.
+  PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
+  in += stride;
+  out += stride;

  // Filter line-by-line.
  FILTER_LINE_BY_LINE;
@ -275,8 +249,7 @@ static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,

 static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
                                     int stride, uint8_t* filtered_data) {
-  DoVerticalFilter_MIPSdspR2(data, width, height, stride, 0, height,
-                             filtered_data);
+  DoVerticalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
 }

 //------------------------------------------------------------------------------
@ -297,7 +270,7 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
 }

 #define FILTER_LINE_BY_LINE(PREDS, OPERATION) do {                             \
-    while (row < last_row) {                                                   \
+    for (row = 1; row < height; ++row) {                                       \
      int w;                                                                   \
      PREDICT_LINE_ONE_PASS(in, PREDS - stride, out);                          \
      for (w = 1; w < width; ++w) {                                            \
@ -306,7 +279,6 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
                                                     PREDS[w - stride - 1]);   \
        out[w] = in[w] OPERATION pred;                                         \
      }                                                                        \
-      ++row;                                                                   \
      in += stride;                                                            \
      out += stride;                                                           \
    }                                                                          \
@ -314,24 +286,17 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {

 static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
                                       int width, int height, int stride,
-                                       int row, int num_rows, uint8_t* out) {
-  const uint8_t* preds;
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+                                       uint8_t* out) {
+  const uint8_t* preds = in;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;
-  preds = in;

  // left prediction for top scan-line
-  if (row == 0) {
-    out[0] = in[0];
-    PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
-    row = 1;
-    preds += stride;
-    in += stride;
-    out += stride;
-  }
+  out[0] = in[0];
+  PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
+  preds += stride;
+  in += stride;
+  out += stride;

  // Filter line-by-line.
  FILTER_LINE_BY_LINE(in, -);
@ -340,8 +305,7 @@ static void DoGradientFilter_MIPSdspR2(const uint8_t* in,

 static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
                                     int stride, uint8_t* filtered_data) {
-  DoGradientFilter_MIPSdspR2(data, width, height, stride, 0, height,
-                             filtered_data);
+  DoGradientFilter_MIPSdspR2(data, width, height, stride, filtered_data);
 }

 //------------------------------------------------------------------------------
--- a/src/dsp/filters_neon.c
+++ b/src/dsp/filters_neon.c
@ -28,8 +28,6 @@
    assert(width > 0);                                                         \
    assert(height > 0);                                                        \
    assert(stride >= width);                                                   \
-    assert(row >= 0 && num_rows > 0 && row + num_rows <= height);              \
-    (void)height;  /* Silence unused warning. */                               \
  } while (0)

 // load eight u8 and widen to s16
@ -68,30 +66,21 @@ static void PredictLineLeft_NEON(const uint8_t* src, uint8_t* dst, int length) {

 static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,
                                                int width, int height,
-                                                int stride,
-                                                int row, int num_rows,
-                                                uint8_t* out) {
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+                                                int stride, uint8_t* out) {
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;

-  if (row == 0) {
-    // Leftmost pixel is the same as input for topmost scanline.
-    out[0] = in[0];
-    PredictLineLeft_NEON(in + 1, out + 1, width - 1);
-    row = 1;
-    in += stride;
-    out += stride;
-  }
+  // Leftmost pixel is the same as input for topmost scanline.
+  out[0] = in[0];
+  PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+  in += stride;
+  out += stride;

  // Filter line-by-line.
-  while (row < last_row) {
+  for (row = 1; row < height; ++row) {
    // Leftmost pixel is predicted from above.
    out[0] = in[0] - in[-stride];
    PredictLineLeft_NEON(in + 1, out + 1, width - 1);
-    ++row;
    in += stride;
    out += stride;
  }
@ -99,8 +88,7 @@ static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,

 static void HorizontalFilter_NEON(const uint8_t* data, int width, int height,
                                  int stride, uint8_t* filtered_data) {
-  DoHorizontalFilter_NEON(data, width, height, stride, 0, height,
-                          filtered_data);
+  DoHorizontalFilter_NEON(data, width, height, stride, filtered_data);
 }

 //------------------------------------------------------------------------------
@ -108,28 +96,20 @@ static void HorizontalFilter_NEON(const uint8_t* data, int width, int height,

 static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
                                              int width, int height, int stride,
-                                              int row, int num_rows,
                                              uint8_t* out) {
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;

-  if (row == 0) {
-    // Very first top-left pixel is copied.
-    out[0] = in[0];
-    // Rest of top scan-line is left-predicted.
-    PredictLineLeft_NEON(in + 1, out + 1, width - 1);
-    row = 1;
-    in += stride;
-    out += stride;
-  }
+  // Very first top-left pixel is copied.
+  out[0] = in[0];
+  // Rest of top scan-line is left-predicted.
+  PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+  in += stride;
+  out += stride;

  // Filter line-by-line.
-  while (row < last_row) {
+  for (row = 1; row < height; ++row) {
    PredictLine_NEON(in, in - stride, out, width);
-    ++row;
    in += stride;
    out += stride;
  }
@ -137,8 +117,7 @@ static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,

 static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
                                int stride, uint8_t* filtered_data) {
-  DoVerticalFilter_NEON(data, width, height, stride, 0, height,
-                        filtered_data);
+  DoVerticalFilter_NEON(data, width, height, stride, filtered_data);
 }

 //------------------------------------------------------------------------------
@ -168,30 +147,21 @@ static void GradientPredictDirect_NEON(const uint8_t* const row,
 }

 static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
-                                              int width, int height,
-                                              int stride,
-                                              int row, int num_rows,
+                                              int width, int height, int stride,
                                              uint8_t* out) {
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;

  // left prediction for top scan-line
-  if (row == 0) {
-    out[0] = in[0];
-    PredictLineLeft_NEON(in + 1, out + 1, width - 1);
-    row = 1;
-    in += stride;
-    out += stride;
-  }
+  out[0] = in[0];
+  PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+  in += stride;
+  out += stride;

  // Filter line-by-line.
-  while (row < last_row) {
+  for (row = 1; row < height; ++row) {
    out[0] = in[0] - in[-stride];
    GradientPredictDirect_NEON(in + 1, in + 1 - stride, out + 1, width - 1);
-    ++row;
    in += stride;
    out += stride;
  }
@ -199,8 +169,7 @@ static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,

 static void GradientFilter_NEON(const uint8_t* data, int width, int height,
                                int stride, uint8_t* filtered_data) {
-  DoGradientFilter_NEON(data, width, height, stride, 0, height,
-                        filtered_data);
+  DoGradientFilter_NEON(data, width, height, stride, filtered_data);
 }

 #undef DCHECK
--- a/src/dsp/filters_sse2.c
+++ b/src/dsp/filters_sse2.c
@ -30,8 +30,6 @@
    assert(width > 0);                                                         \
    assert(height > 0);                                                        \
    assert(stride >= width);                                                   \
-    assert(row >= 0 && num_rows > 0 && row + num_rows <= height);              \
-    (void)height;  /* Silence unused warning. */                               \
  } while (0)

 static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
@ -75,30 +73,21 @@ static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {

 static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
                                                int width, int height,
-                                                int stride,
-                                                int row, int num_rows,
-                                                uint8_t* out) {
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+                                                int stride, uint8_t* out) {
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;

-  if (row == 0) {
-    // Leftmost pixel is the same as input for topmost scanline.
-    out[0] = in[0];
-    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
-    row = 1;
-    in += stride;
-    out += stride;
-  }
+  // Leftmost pixel is the same as input for topmost scanline.
+  out[0] = in[0];
+  PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
+  in += stride;
+  out += stride;

  // Filter line-by-line.
-  while (row < last_row) {
+  for (row = 1; row < height; ++row) {
    // Leftmost pixel is predicted from above.
    out[0] = in[0] - in[-stride];
    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
-    ++row;
    in += stride;
    out += stride;
  }
@ -109,28 +98,20 @@ static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,

 static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
                                              int width, int height, int stride,
-                                              int row, int num_rows,
                                              uint8_t* out) {
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;

-  if (row == 0) {
-    // Very first top-left pixel is copied.
-    out[0] = in[0];
-    // Rest of top scan-line is left-predicted.
-    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
-    row = 1;
-    in += stride;
-    out += stride;
-  }
+  // Very first top-left pixel is copied.
+  out[0] = in[0];
+  // Rest of top scan-line is left-predicted.
+  PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
+  in += stride;
+  out += stride;

  // Filter line-by-line.
-  while (row < last_row) {
+  for (row = 1; row < height; ++row) {
    PredictLineTop_SSE2(in, in - stride, out, width);
-    ++row;
    in += stride;
    out += stride;
  }
@ -172,28 +153,20 @@ static void GradientPredictDirect_SSE2(const uint8_t* const row,

 static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
                                              int width, int height, int stride,
-                                              int row, int num_rows,
                                              uint8_t* out) {
-  const size_t start_offset = row * stride;
-  const int last_row = row + num_rows;
+  int row;
  DCHECK(in, out);
-  in += start_offset;
-  out += start_offset;

  // left prediction for top scan-line
-  if (row == 0) {
-    out[0] = in[0];
-    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
-    row = 1;
-    in += stride;
-    out += stride;
-  }
+  out[0] = in[0];
+  PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
+  in += stride;
+  out += stride;

  // Filter line-by-line.
-  while (row < last_row) {
+  for (row = 1; row < height; ++row) {
    out[0] = (uint8_t)(in[0] - in[-stride]);
    GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
-    ++row;
    in += stride;
    out += stride;
  }
@ -205,18 +178,17 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,

 static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
                                  int stride, uint8_t* filtered_data) {
-  DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
-                          filtered_data);
+  DoHorizontalFilter_SSE2(data, width, height, stride, filtered_data);
 }

 static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
                                int stride, uint8_t* filtered_data) {
-  DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
+  DoVerticalFilter_SSE2(data, width, height, stride, filtered_data);
 }

 static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
                                int stride, uint8_t* filtered_data) {
-  DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
+  DoGradientFilter_SSE2(data, width, height, stride, filtered_data);
 }

 //------------------------------------------------------------------------------
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@ -107,14 +107,14 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
 //------------------------------------------------------------------------------
 // Predictors

-uint32_t VP8LPredictor0_C(const uint32_t* const left,
-                          const uint32_t* const top) {
+static uint32_t VP8LPredictor0_C(const uint32_t* const left,
+                                 const uint32_t* const top) {
  (void)top;
  (void)left;
  return ARGB_BLACK;
 }
-uint32_t VP8LPredictor1_C(const uint32_t* const left,
-                          const uint32_t* const top) {
+static uint32_t VP8LPredictor1_C(const uint32_t* const left,
+                                 const uint32_t* const top) {
  (void)top;
  return *left;
 }
--- a/src/dsp/lossless.h
+++ b/src/dsp/lossless.h
@ -32,10 +32,6 @@ typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left,
                                      const uint32_t* const top);
 extern VP8LPredictorFunc VP8LPredictors[16];

-uint32_t VP8LPredictor0_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor1_C(const uint32_t* const left,
-                          const uint32_t* const top);
 uint32_t VP8LPredictor2_C(const uint32_t* const left,
                          const uint32_t* const top);
 uint32_t VP8LPredictor3_C(const uint32_t* const left,
@ -155,13 +151,13 @@ extern VP8LTransformColorFunc VP8LTransformColor;
 typedef void (*VP8LCollectColorBlueTransformsFunc)(
    const uint32_t* argb, int stride,
    int tile_width, int tile_height,
-    int green_to_blue, int red_to_blue, int histo[]);
+    int green_to_blue, int red_to_blue, uint32_t histo[]);
 extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;

 typedef void (*VP8LCollectColorRedTransformsFunc)(
    const uint32_t* argb, int stride,
    int tile_width, int tile_height,
-    int green_to_red, int histo[]);
+    int green_to_red, uint32_t histo[]);
 extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;

 // Expose some C-only fallback functions
@ -170,11 +166,11 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m,
 void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
 void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
                                     int tile_width, int tile_height,
-                                     int green_to_red, int histo[]);
+                                     int green_to_red, uint32_t histo[]);
 void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
                                      int tile_width, int tile_height,
                                      int green_to_blue, int red_to_blue,
-                                      int histo[]);
+                                      uint32_t histo[]);

 extern VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
 extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
@ -185,8 +181,8 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
 typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
 typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
                                         int length);
-typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
-                                                const int Y[256]);
+typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
+                                                   const uint32_t Y[256]);

 extern VP8LCostFunc VP8LExtraCost;
 extern VP8LCostCombinedFunc VP8LExtraCostCombined;
@ -198,7 +194,7 @@ typedef struct {        // small struct to hold counters
 } VP8LStreaks;

 typedef struct {            // small struct to hold bit entropy results
-  float entropy;            // entropy
+  uint64_t entropy;         // entropy
  uint32_t sum;             // sum of the population
  int nonzeros;             // number of non-zero elements in the population
  uint32_t max_val;         // maximum value in the population
--- a/src/dsp/lossless_common.h
+++ b/src/dsp/lossless_common.h
@ -73,23 +73,44 @@ static WEBP_INLINE int VP8LNearLosslessBits(int near_lossless_quality) {
 // Keeping a high threshold for now.
 #define APPROX_LOG_WITH_CORRECTION_MAX  65536
 #define APPROX_LOG_MAX                   4096
+// VP8LFastLog2 and VP8LFastSLog2 are used on elements from image histograms.
+// The histogram values cannot exceed the maximum number of pixels, which
+// is (1 << 14) * (1 << 14). Therefore S * log(S) < (1 << 33).
+// No more than 32 bits of precision should be chosen.
+// To match the original float implementation, 23 bits of precision are used.
+#define LOG_2_PRECISION_BITS 23
 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+// LOG_2_RECIPROCAL * (1 << LOG_2_PRECISION_BITS)
+#define LOG_2_RECIPROCAL_FIXED_DOUBLE 12102203.161561485379934310913085937500
+#define LOG_2_RECIPROCAL_FIXED ((uint64_t)12102203)
 #define LOG_LOOKUP_IDX_MAX 256
-extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
-extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
-typedef float (*VP8LFastLog2SlowFunc)(uint32_t v);
+extern const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX];
+extern const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX];
+typedef uint32_t (*VP8LFastLog2SlowFunc)(uint32_t v);
+typedef uint64_t (*VP8LFastSLog2SlowFunc)(uint32_t v);

 extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
-extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+extern VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;

-static WEBP_INLINE float VP8LFastLog2(uint32_t v) {
+static WEBP_INLINE uint32_t VP8LFastLog2(uint32_t v) {
  return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
 }
 // Fast calculation of v * log2(v) for integer input.
-static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
+static WEBP_INLINE uint64_t VP8LFastSLog2(uint32_t v) {
  return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
 }

+static WEBP_INLINE uint64_t RightShiftRound(uint64_t v, uint32_t shift) {
+  return (v + (1ull << shift >> 1)) >> shift;
+}
+
+static WEBP_INLINE int64_t DivRound(int64_t a, int64_t b) {
+  return ((a < 0) == (b < 0)) ? ((a + b / 2) / b) : ((a - b / 2) / b);
+}
+
+#define WEBP_INT64_MAX ((int64_t)((1ull << 63) - 1))
+#define WEBP_UINT64_MAX (~0ull)
+
 // -----------------------------------------------------------------------------
 // PrefixEncode()

--- a/src/dsp/lossless_enc.c
+++ b/src/dsp/lossless_enc.c
@ -24,203 +24,123 @@
 #include "src/dsp/lossless_common.h"
 #include "src/dsp/yuv.h"

-// lookup table for small values of log2(int)
-const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
-  0.0000000000000000f, 0.0000000000000000f,
-  1.0000000000000000f, 1.5849625007211560f,
-  2.0000000000000000f, 2.3219280948873621f,
-  2.5849625007211560f, 2.8073549220576041f,
-  3.0000000000000000f, 3.1699250014423121f,
-  3.3219280948873621f, 3.4594316186372973f,
-  3.5849625007211560f, 3.7004397181410921f,
-  3.8073549220576041f, 3.9068905956085187f,
-  4.0000000000000000f, 4.0874628412503390f,
-  4.1699250014423121f, 4.2479275134435852f,
-  4.3219280948873626f, 4.3923174227787606f,
-  4.4594316186372973f, 4.5235619560570130f,
-  4.5849625007211560f, 4.6438561897747243f,
-  4.7004397181410917f, 4.7548875021634682f,
-  4.8073549220576037f, 4.8579809951275718f,
-  4.9068905956085187f, 4.9541963103868749f,
-  5.0000000000000000f, 5.0443941193584533f,
-  5.0874628412503390f, 5.1292830169449663f,
-  5.1699250014423121f, 5.2094533656289501f,
-  5.2479275134435852f, 5.2854022188622487f,
-  5.3219280948873626f, 5.3575520046180837f,
-  5.3923174227787606f, 5.4262647547020979f,
-  5.4594316186372973f, 5.4918530963296747f,
-  5.5235619560570130f, 5.5545888516776376f,
-  5.5849625007211560f, 5.6147098441152083f,
-  5.6438561897747243f, 5.6724253419714951f,
-  5.7004397181410917f, 5.7279204545631987f,
-  5.7548875021634682f, 5.7813597135246599f,
-  5.8073549220576037f, 5.8328900141647412f,
-  5.8579809951275718f, 5.8826430493618415f,
-  5.9068905956085187f, 5.9307373375628866f,
-  5.9541963103868749f, 5.9772799234999167f,
-  6.0000000000000000f, 6.0223678130284543f,
-  6.0443941193584533f, 6.0660891904577720f,
-  6.0874628412503390f, 6.1085244567781691f,
-  6.1292830169449663f, 6.1497471195046822f,
-  6.1699250014423121f, 6.1898245588800175f,
-  6.2094533656289501f, 6.2288186904958804f,
-  6.2479275134435852f, 6.2667865406949010f,
-  6.2854022188622487f, 6.3037807481771030f,
-  6.3219280948873626f, 6.3398500028846243f,
-  6.3575520046180837f, 6.3750394313469245f,
-  6.3923174227787606f, 6.4093909361377017f,
-  6.4262647547020979f, 6.4429434958487279f,
-  6.4594316186372973f, 6.4757334309663976f,
-  6.4918530963296747f, 6.5077946401986963f,
-  6.5235619560570130f, 6.5391588111080309f,
-  6.5545888516776376f, 6.5698556083309478f,
-  6.5849625007211560f, 6.5999128421871278f,
-  6.6147098441152083f, 6.6293566200796094f,
-  6.6438561897747243f, 6.6582114827517946f,
-  6.6724253419714951f, 6.6865005271832185f,
-  6.7004397181410917f, 6.7142455176661224f,
-  6.7279204545631987f, 6.7414669864011464f,
-  6.7548875021634682f, 6.7681843247769259f,
-  6.7813597135246599f, 6.7944158663501061f,
-  6.8073549220576037f, 6.8201789624151878f,
-  6.8328900141647412f, 6.8454900509443747f,
-  6.8579809951275718f, 6.8703647195834047f,
-  6.8826430493618415f, 6.8948177633079437f,
-  6.9068905956085187f, 6.9188632372745946f,
-  6.9307373375628866f, 6.9425145053392398f,
-  6.9541963103868749f, 6.9657842846620869f,
-  6.9772799234999167f, 6.9886846867721654f,
-  7.0000000000000000f, 7.0112272554232539f,
-  7.0223678130284543f, 7.0334230015374501f,
-  7.0443941193584533f, 7.0552824355011898f,
-  7.0660891904577720f, 7.0768155970508308f,
-  7.0874628412503390f, 7.0980320829605263f,
-  7.1085244567781691f, 7.1189410727235076f,
-  7.1292830169449663f, 7.1395513523987936f,
-  7.1497471195046822f, 7.1598713367783890f,
-  7.1699250014423121f, 7.1799090900149344f,
-  7.1898245588800175f, 7.1996723448363644f,
-  7.2094533656289501f, 7.2191685204621611f,
-  7.2288186904958804f, 7.2384047393250785f,
-  7.2479275134435852f, 7.2573878426926521f,
-  7.2667865406949010f, 7.2761244052742375f,
-  7.2854022188622487f, 7.2946207488916270f,
-  7.3037807481771030f, 7.3128829552843557f,
-  7.3219280948873626f, 7.3309168781146167f,
-  7.3398500028846243f, 7.3487281542310771f,
-  7.3575520046180837f, 7.3663222142458160f,
-  7.3750394313469245f, 7.3837042924740519f,
-  7.3923174227787606f, 7.4008794362821843f,
-  7.4093909361377017f, 7.4178525148858982f,
-  7.4262647547020979f, 7.4346282276367245f,
-  7.4429434958487279f, 7.4512111118323289f,
-  7.4594316186372973f, 7.4676055500829976f,
-  7.4757334309663976f, 7.4838157772642563f,
-  7.4918530963296747f, 7.4998458870832056f,
-  7.5077946401986963f, 7.5156998382840427f,
-  7.5235619560570130f, 7.5313814605163118f,
-  7.5391588111080309f, 7.5468944598876364f,
-  7.5545888516776376f, 7.5622424242210728f,
-  7.5698556083309478f, 7.5774288280357486f,
-  7.5849625007211560f, 7.5924570372680806f,
-  7.5999128421871278f, 7.6073303137496104f,
-  7.6147098441152083f, 7.6220518194563764f,
-  7.6293566200796094f, 7.6366246205436487f,
-  7.6438561897747243f, 7.6510516911789281f,
-  7.6582114827517946f, 7.6653359171851764f,
-  7.6724253419714951f, 7.6794800995054464f,
-  7.6865005271832185f, 7.6934869574993252f,
-  7.7004397181410917f, 7.7073591320808825f,
-  7.7142455176661224f, 7.7210991887071855f,
-  7.7279204545631987f, 7.7347096202258383f,
-  7.7414669864011464f, 7.7481928495894605f,
-  7.7548875021634682f, 7.7615512324444795f,
-  7.7681843247769259f, 7.7747870596011736f,
-  7.7813597135246599f, 7.7879025593914317f,
-  7.7944158663501061f, 7.8008998999203047f,
-  7.8073549220576037f, 7.8137811912170374f,
-  7.8201789624151878f, 7.8265484872909150f,
-  7.8328900141647412f, 7.8392037880969436f,
-  7.8454900509443747f, 7.8517490414160571f,
-  7.8579809951275718f, 7.8641861446542797f,
-  7.8703647195834047f, 7.8765169465649993f,
-  7.8826430493618415f, 7.8887432488982591f,
-  7.8948177633079437f, 7.9008668079807486f,
-  7.9068905956085187f, 7.9128893362299619f,
-  7.9188632372745946f, 7.9248125036057812f,
-  7.9307373375628866f, 7.9366379390025709f,
-  7.9425145053392398f, 7.9483672315846778f,
-  7.9541963103868749f, 7.9600019320680805f,
-  7.9657842846620869f, 7.9715435539507719f,
-  7.9772799234999167f, 7.9829935746943103f,
-  7.9886846867721654f, 7.9943534368588577f
+// lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS).
+// Obtained in Python with:
+// a = [ str(round((1<<23)*math.log2(i))) if i else "0" for i in range(256)]
+// print(',\n'.join(['  '+','.join(v)
+//       for v in batched([i.rjust(9) for i in a],7)]))
+const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX] = {
+         0,        0,  8388608, 13295629, 16777216, 19477745, 21684237,
+  23549800, 25165824, 26591258, 27866353, 29019816, 30072845, 31041538,
+  31938408, 32773374, 33554432, 34288123, 34979866, 35634199, 36254961,
+  36845429, 37408424, 37946388, 38461453, 38955489, 39430146, 39886887,
+  40327016, 40751698, 41161982, 41558811, 41943040, 42315445, 42676731,
+  43027545, 43368474, 43700062, 44022807, 44337167, 44643569, 44942404,
+  45234037, 45518808, 45797032, 46069003, 46334996, 46595268, 46850061,
+  47099600, 47344097, 47583753, 47818754, 48049279, 48275495, 48497560,
+  48715624, 48929828, 49140306, 49347187, 49550590, 49750631, 49947419,
+  50141058, 50331648, 50519283, 50704053, 50886044, 51065339, 51242017,
+  51416153, 51587818, 51757082, 51924012, 52088670, 52251118, 52411415,
+  52569616, 52725775, 52879946, 53032177, 53182516, 53331012, 53477707,
+  53622645, 53765868, 53907416, 54047327, 54185640, 54322389, 54457611,
+  54591338, 54723604, 54854440, 54983876, 55111943, 55238669, 55364082,
+  55488208, 55611074, 55732705, 55853126, 55972361, 56090432, 56207362,
+  56323174, 56437887, 56551524, 56664103, 56775645, 56886168, 56995691,
+  57104232, 57211808, 57318436, 57424133, 57528914, 57632796, 57735795,
+  57837923, 57939198, 58039632, 58139239, 58238033, 58336027, 58433234,
+  58529666, 58625336, 58720256, 58814437, 58907891, 59000628, 59092661,
+  59183999, 59274652, 59364632, 59453947, 59542609, 59630625, 59718006,
+  59804761, 59890898, 59976426, 60061354, 60145690, 60229443, 60312620,
+  60395229, 60477278, 60558775, 60639726, 60720140, 60800023, 60879382,
+  60958224, 61036555, 61114383, 61191714, 61268554, 61344908, 61420785,
+  61496188, 61571124, 61645600, 61719620, 61793189, 61866315, 61939001,
+  62011253, 62083076, 62154476, 62225457, 62296024, 62366182, 62435935,
+  62505289, 62574248, 62642816, 62710997, 62778797, 62846219, 62913267,
+  62979946, 63046260, 63112212, 63177807, 63243048, 63307939, 63372484,
+  63436687, 63500551, 63564080, 63627277, 63690146, 63752690, 63814912,
+  63876816, 63938405, 63999682, 64060650, 64121313, 64181673, 64241734,
+  64301498, 64360969, 64420148, 64479040, 64537646, 64595970, 64654014,
+  64711782, 64769274, 64826495, 64883447, 64940132, 64996553, 65052711,
+  65108611, 65164253, 65219641, 65274776, 65329662, 65384299, 65438691,
+  65492840, 65546747, 65600416, 65653847, 65707044, 65760008, 65812741,
+  65865245, 65917522, 65969575, 66021404, 66073013, 66124403, 66175575,
+  66226531, 66277275, 66327806, 66378127, 66428240, 66478146, 66527847,
+  66577345, 66626641, 66675737, 66724635, 66773336, 66821842, 66870154,
+  66918274, 66966204, 67013944, 67061497
 };

-const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
-  0.00000000f,    0.00000000f,  2.00000000f,   4.75488750f,
-  8.00000000f,   11.60964047f,  15.50977500f,  19.65148445f,
-  24.00000000f,  28.52932501f,  33.21928095f,  38.05374781f,
-  43.01955001f,  48.10571634f,  53.30296891f,  58.60335893f,
-  64.00000000f,  69.48686830f,  75.05865003f,  80.71062276f,
-  86.43856190f,  92.23866588f,  98.10749561f,  104.04192499f,
-  110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
-  134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
-  160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
-  186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
-  212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
-  240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
-  268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
-  296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
-  325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
-  354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
-  384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
-  413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
-  444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
-  474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
-  505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
-  536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
-  568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
-  600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
-  632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
-  664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
-  696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
-  729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
-  762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
-  795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
-  828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
-  862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
-  896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
-  929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
-  963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
-  998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
-  1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
-  1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
-  1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
-  1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
-  1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
-  1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
-  1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
-  1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
-  1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
-  1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
-  1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
-  1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
-  1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
-  1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
-  1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
-  1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
-  1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
-  1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
-  1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
-  1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
-  1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
-  1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
-  1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
-  1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
-  1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
-  1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
-  1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
-  2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
+// lookup table for small values of int*log2(int) * (1 << LOG_2_PRECISION_BITS).
+// Obtained in Python with:
+// a=[ "%d"%i if i<(1<<32) else "%dull"%i
+//     for i in [ round((1<<LOG_2_PRECISION_BITS)*math.log2(i)*i) if i
+//     else 0 for i in range(256)]]
+// print(',\n '.join([','.join(v) for v in batched([i.rjust(15)
+//                      for i in a],4)]))
+const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
+               0,              0,       16777216,       39886887,
+        67108864,       97388723,      130105423,      164848600,
+       201326592,      239321324,      278663526,      319217973,
+       360874141,      403539997,      447137711,      491600606,
+       536870912,      582898099,      629637592,      677049776,
+       725099212,      773754010,      822985323,      872766924,
+       923074875,      973887230,     1025183802,     1076945958,
+      1129156447,     1181799249,     1234859451,     1288323135,
+      1342177280,     1396409681,     1451008871,     1505964059,
+      1561265072,     1616902301,     1672866655,     1729149526,
+      1785742744,     1842638548,     1899829557,     1957308741,
+      2015069397,     2073105127,     2131409817,  2189977618ull,
+   2248802933ull,  2307880396ull,  2367204859ull,  2426771383ull,
+   2486575220ull,  2546611805ull,  2606876748ull,  2667365819ull,
+   2728074942ull,  2789000187ull,  2850137762ull,  2911484006ull,
+   2973035382ull,  3034788471ull,  3096739966ull,  3158886666ull,
+   3221225472ull,  3283753383ull,  3346467489ull,  3409364969ull,
+   3472443085ull,  3535699182ull,  3599130679ull,  3662735070ull,
+   3726509920ull,  3790452862ull,  3854561593ull,  3918833872ull,
+   3983267519ull,  4047860410ull,  4112610476ull,  4177515704ull,
+   4242574127ull,  4307783833ull,  4373142952ull,  4438649662ull,
+   4504302186ull,  4570098787ull,  4636037770ull,  4702117480ull,
+   4768336298ull,  4834692645ull,  4901184974ull,  4967811774ull,
+   5034571569ull,  5101462912ull,  5168484389ull,  5235634615ull,
+   5302912235ull,  5370315922ull,  5437844376ull,  5505496324ull,
+   5573270518ull,  5641165737ull,  5709180782ull,  5777314477ull,
+   5845565671ull,  5913933235ull,  5982416059ull,  6051013057ull,
+   6119723161ull,  6188545324ull,  6257478518ull,  6326521733ull,
+   6395673979ull,  6464934282ull,  6534301685ull,  6603775250ull,
+   6673354052ull,  6743037185ull,  6812823756ull,  6882712890ull,
+   6952703725ull,  7022795412ull,  7092987118ull,  7163278025ull,
+   7233667324ull,  7304154222ull,  7374737939ull,  7445417707ull,
+   7516192768ull,  7587062379ull,  7658025806ull,  7729082328ull,
+   7800231234ull,  7871471825ull,  7942803410ull,  8014225311ull,
+   8085736859ull,  8157337394ull,  8229026267ull,  8300802839ull,
+   8372666477ull,  8444616560ull,  8516652476ull,  8588773618ull,
+   8660979393ull,  8733269211ull,  8805642493ull,  8878098667ull,
+   8950637170ull,  9023257446ull,  9095958945ull,  9168741125ull,
+   9241603454ull,  9314545403ull,  9387566451ull,  9460666086ull,
+   9533843800ull,  9607099093ull,  9680431471ull,  9753840445ull,
+   9827325535ull,  9900886263ull,  9974522161ull, 10048232765ull,
+  10122017615ull, 10195876260ull, 10269808253ull, 10343813150ull,
+  10417890516ull, 10492039919ull, 10566260934ull, 10640553138ull,
+  10714916116ull, 10789349456ull, 10863852751ull, 10938425600ull,
+  11013067604ull, 11087778372ull, 11162557513ull, 11237404645ull,
+  11312319387ull, 11387301364ull, 11462350205ull, 11537465541ull,
+  11612647010ull, 11687894253ull, 11763206912ull, 11838584638ull,
+  11914027082ull, 11989533899ull, 12065104750ull, 12140739296ull,
+  12216437206ull, 12292198148ull, 12368021795ull, 12443907826ull,
+  12519855920ull, 12595865759ull, 12671937032ull, 12748069427ull,
+  12824262637ull, 12900516358ull, 12976830290ull, 13053204134ull,
+  13129637595ull, 13206130381ull, 13282682202ull, 13359292772ull,
+  13435961806ull, 13512689025ull, 13589474149ull, 13666316903ull,
+  13743217014ull, 13820174211ull, 13897188225ull, 13974258793ull,
+  14051385649ull, 14128568535ull, 14205807192ull, 14283101363ull,
+  14360450796ull, 14437855239ull, 14515314443ull, 14592828162ull,
+  14670396151ull, 14748018167ull, 14825693972ull, 14903423326ull,
+  14981205995ull, 15059041743ull, 15136930339ull, 15214871554ull,
+  15292865160ull, 15370910930ull, 15449008641ull, 15527158071ull,
+  15605359001ull, 15683611210ull, 15761914485ull, 15840268608ull,
+  15918673369ull, 15997128556ull, 16075633960ull, 16154189373ull,
+  16232794589ull, 16311449405ull, 16390153617ull, 16468907026ull,
+  16547709431ull, 16626560636ull, 16705460444ull, 16784408661ull,
+  16863405094ull, 16942449552ull, 17021541845ull, 17100681785ull
 };

 const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
@ -326,23 +246,19 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
 };

-static float FastSLog2Slow_C(uint32_t v) {
+static uint64_t FastSLog2Slow_C(uint32_t v) {
  assert(v >= LOG_LOOKUP_IDX_MAX);
  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    const uint64_t orig_v = v;
+    uint64_t correction;
 #if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
    // use clz if available
-    const int log_cnt = BitsLog2Floor(v) - 7;
+    const uint64_t log_cnt = BitsLog2Floor(v) - 7;
    const uint32_t y = 1 << log_cnt;
-    int correction = 0;
-    const float v_f = (float)v;
-    const uint32_t orig_v = v;
    v >>= log_cnt;
 #else
-    int log_cnt = 0;
+    uint64_t log_cnt = 0;
    uint32_t y = 1;
-    int correction = 0;
-    const float v_f = (float)v;
-    const uint32_t orig_v = v;
    do {
      ++log_cnt;
      v = v >> 1;
@ -354,45 +270,43 @@ static float FastSLog2Slow_C(uint32_t v) {
    // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
    // The correction factor: log(1 + d) ~ d; for very small d values, so
    // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
-    // LOG_2_RECIPROCAL ~ 23/16
-    correction = (23 * (orig_v & (y - 1))) >> 4;
-    return v_f * (kLog2Table[v] + log_cnt) + correction;
+    correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1));
+    return orig_v * (kLog2Table[v] + (log_cnt << LOG_2_PRECISION_BITS)) +
+           correction;
  } else {
-    return (float)(LOG_2_RECIPROCAL * v * log((double)v));
+    return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);
  }
 }

-static float FastLog2Slow_C(uint32_t v) {
+static uint32_t FastLog2Slow_C(uint32_t v) {
  assert(v >= LOG_LOOKUP_IDX_MAX);
  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    const uint32_t orig_v = v;
+    uint32_t log_2;
 #if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
    // use clz if available
-    const int log_cnt = BitsLog2Floor(v) - 7;
+    const uint32_t log_cnt = BitsLog2Floor(v) - 7;
    const uint32_t y = 1 << log_cnt;
-    const uint32_t orig_v = v;
-    double log_2;
    v >>= log_cnt;
 #else
-    int log_cnt = 0;
+    uint32_t log_cnt = 0;
    uint32_t y = 1;
-    const uint32_t orig_v = v;
-    double log_2;
    do {
      ++log_cnt;
      v = v >> 1;
      y = y << 1;
    } while (v >= LOG_LOOKUP_IDX_MAX);
 #endif
-    log_2 = kLog2Table[v] + log_cnt;
+    log_2 = kLog2Table[v] + (log_cnt << LOG_2_PRECISION_BITS);
    if (orig_v >= APPROX_LOG_MAX) {
      // Since the division is still expensive, add this correction factor only
      // for large values of 'v'.
-      const int correction = (23 * (orig_v & (y - 1))) >> 4;
-      log_2 += (double)correction / orig_v;
+      const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1));
+      log_2 += (uint32_t)DivRound(correction, orig_v);
    }
-    return (float)log_2;
+    return log_2;
  } else {
-    return (float)(LOG_2_RECIPROCAL * log((double)v));
+    return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5);
  }
 }

@ -400,29 +314,30 @@ static float FastLog2Slow_C(uint32_t v) {
 // Methods to calculate Entropy (Shannon).

 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
-static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
+static uint64_t CombinedShannonEntropy_C(const uint32_t X[256],
+                                         const uint32_t Y[256]) {
  int i;
-  float retval = 0.f;
-  int sumX = 0, sumXY = 0;
+  uint64_t retval = 0;
+  uint32_t sumX = 0, sumXY = 0;
  for (i = 0; i < 256; ++i) {
-    const int x = X[i];
+    const uint32_t x = X[i];
    if (x != 0) {
-      const int xy = x + Y[i];
+      const uint32_t xy = x + Y[i];
      sumX += x;
-      retval -= VP8LFastSLog2(x);
+      retval += VP8LFastSLog2(x);
      sumXY += xy;
-      retval -= VP8LFastSLog2(xy);
+      retval += VP8LFastSLog2(xy);
    } else if (Y[i] != 0) {
      sumXY += Y[i];
-      retval -= VP8LFastSLog2(Y[i]);
+      retval += VP8LFastSLog2(Y[i]);
    }
  }
-  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
+  retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
  return retval;
 }

 void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
-  entropy->entropy = 0.;
+  entropy->entropy = 0;
  entropy->sum = 0;
  entropy->nonzeros = 0;
  entropy->max_val = 0;
@ -440,13 +355,13 @@ void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
      entropy->sum += array[i];
      entropy->nonzero_code = i;
      ++entropy->nonzeros;
-      entropy->entropy -= VP8LFastSLog2(array[i]);
+      entropy->entropy += VP8LFastSLog2(array[i]);
      if (entropy->max_val < array[i]) {
        entropy->max_val = array[i];
      }
    }
  }
-  entropy->entropy += VP8LFastSLog2(entropy->sum);
+  entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;
 }

 static WEBP_INLINE void GetEntropyUnrefinedHelper(
@ -459,7 +374,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
    bit_entropy->sum += (*val_prev) * streak;
    bit_entropy->nonzeros += streak;
    bit_entropy->nonzero_code = *i_prev;
-    bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
+    bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;
    if (bit_entropy->max_val < *val_prev) {
      bit_entropy->max_val = *val_prev;
    }
@ -491,7 +406,7 @@ static void GetEntropyUnrefined_C(const uint32_t X[], int length,
  }
  GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);

-  bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
+  bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
 }

 static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
@ -514,7 +429,7 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
  }
  GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);

-  bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
+  bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
 }

 //------------------------------------------------------------------------------
@ -577,7 +492,7 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,

 void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
                                     int tile_width, int tile_height,
-                                     int green_to_red, int histo[]) {
+                                     int green_to_red, uint32_t histo[]) {
  while (tile_height-- > 0) {
    int x;
    for (x = 0; x < tile_width; ++x) {
@ -590,7 +505,7 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
 void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
                                      int tile_width, int tile_height,
                                      int green_to_blue, int red_to_blue,
-                                      int histo[]) {
+                                      uint32_t histo[]) {
  while (tile_height-- > 0) {
    int x;
    for (x = 0; x < tile_width; ++x) {
@ -778,7 +693,7 @@ VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
 VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;

 VP8LFastLog2SlowFunc VP8LFastLog2Slow;
-VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;

 VP8LCostFunc VP8LExtraCost;
 VP8LCostCombinedFunc VP8LExtraCostCombined;
--- a/src/dsp/lossless_enc_mips32.c
+++ b/src/dsp/lossless_enc_mips32.c
@ -23,12 +23,12 @@
 #include <stdlib.h>
 #include <string.h>

-static float FastSLog2Slow_MIPS32(uint32_t v) {
+static uint64_t FastSLog2Slow_MIPS32(uint32_t v) {
  assert(v >= LOG_LOOKUP_IDX_MAX);
  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
-    uint32_t log_cnt, y, correction;
+    uint32_t log_cnt, y;
+    uint64_t correction;
    const int c24 = 24;
-    const float v_f = (float)v;
    uint32_t temp;

    // Xf = 256 = 2^8
@ -49,22 +49,23 @@ static float FastSLog2Slow_MIPS32(uint32_t v) {
    // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
    // The correction factor: log(1 + d) ~ d; for very small d values, so
    // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
-    // LOG_2_RECIPROCAL ~ 23/16

    // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)
-    correction = (23 * (v & (y - 1))) >> 4;
-    return v_f * (kLog2Table[temp] + log_cnt) + correction;
+    correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
+    return (uint64_t)v * (kLog2Table[temp] +
+                          ((uint64_t)log_cnt << LOG_2_PRECISION_BITS)) +
+           correction;
  } else {
-    return (float)(LOG_2_RECIPROCAL * v * log((double)v));
+    return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);
  }
 }

-static float FastLog2Slow_MIPS32(uint32_t v) {
+static uint32_t FastLog2Slow_MIPS32(uint32_t v) {
  assert(v >= LOG_LOOKUP_IDX_MAX);
  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
    uint32_t log_cnt, y;
    const int c24 = 24;
-    double log_2;
+    uint32_t log_2;
    uint32_t temp;

    __asm__ volatile(
@ -78,17 +79,16 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
      : [c24]"r"(c24), [v]"r"(v)
    );

-    log_2 = kLog2Table[temp] + log_cnt;
+    log_2 = kLog2Table[temp] + (log_cnt << LOG_2_PRECISION_BITS);
    if (v >= APPROX_LOG_MAX) {
      // Since the division is still expensive, add this correction factor only
      // for large values of 'v'.
-
-      const uint32_t correction = (23 * (v & (y - 1))) >> 4;
-      log_2 += (double)correction / v;
+      const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
+      log_2 += (uint32_t)DivRound(correction, v);
    }
-    return (float)log_2;
+    return log_2;
  } else {
-    return (float)(LOG_2_RECIPROCAL * log((double)v));
+    return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5);
  }
 }

@ -227,7 +227,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
    bit_entropy->sum += (*val_prev) * streak;
    bit_entropy->nonzeros += streak;
    bit_entropy->nonzero_code = *i_prev;
-    bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
+    bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;
    if (bit_entropy->max_val < *val_prev) {
      bit_entropy->max_val = *val_prev;
    }
@ -259,7 +259,7 @@ static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
  }
  GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);

-  bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
+  bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
 }

 static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
@ -282,7 +282,7 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
  }
  GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, entropy, stats);

-  entropy->entropy += VP8LFastSLog2(entropy->sum);
+  entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;
 }

 #define ASM_START                                       \
--- a/src/dsp/lossless_enc_mips_dsp_r2.c
+++ b/src/dsp/lossless_enc_mips_dsp_r2.c
@ -171,13 +171,9 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
  return (new_blue & 0xff);
 }

-static void CollectColorBlueTransforms_MIPSdspR2(const uint32_t* argb,
-                                                 int stride,
-                                                 int tile_width,
-                                                 int tile_height,
-                                                 int green_to_blue,
-                                                 int red_to_blue,
-                                                 int histo[]) {
+static void CollectColorBlueTransforms_MIPSdspR2(
+    const uint32_t* argb, int stride, int tile_width, int tile_height,
+    int green_to_blue, int red_to_blue, uint32_t histo[]) {
  const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
  const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
  const uint32_t mask = 0xff00ffu;
@ -226,11 +222,10 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
 }

 static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb,
-                                                int stride,
-                                                int tile_width,
+                                                int stride, int tile_width,
                                                int tile_height,
                                                int green_to_red,
-                                                int histo[]) {
+                                                uint32_t histo[]) {
  const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
  while (tile_height-- > 0) {
    int x;
--- a/src/dsp/lossless_enc_sse2.c
+++ b/src/dsp/lossless_enc_sse2.c
@ -82,7 +82,7 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m,
 static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
                                            int tile_width, int tile_height,
                                            int green_to_blue, int red_to_blue,
-                                            int histo[]) {
+                                            uint32_t histo[]) {
  const __m128i mults_r = MK_CST_16(CST_5b(red_to_blue), 0);
  const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_blue));
  const __m128i mask_g = _mm_set1_epi32(0x00ff00);  // green mask
@ -128,7 +128,7 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,

 static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
                                           int tile_width, int tile_height,
-                                           int green_to_red, int histo[]) {
+                                           int green_to_red, uint32_t histo[]) {
  const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red));
  const __m128i mask_g = _mm_set1_epi32(0x00ff00);  // green mask
  const __m128i mask = _mm_set1_epi32(0xff);
@ -232,15 +232,13 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
 //------------------------------------------------------------------------------
 // Entropy

-// TODO(https://crbug.com/webp/499): this function produces different results
-// from the C code due to use of double/float resulting in output differences
-// when compared to -noasm.
-#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86))
+#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)

-static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
+static uint64_t CombinedShannonEntropy_SSE2(const uint32_t X[256],
+                                            const uint32_t Y[256]) {
  int i;
-  float retval = 0.f;
-  int sumX = 0, sumXY = 0;
+  uint64_t retval = 0;
+  uint32_t sumX = 0, sumXY = 0;
  const __m128i zero = _mm_setzero_si128();

  for (i = 0; i < 256; i += 16) {
@ -260,19 +258,19 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
    int32_t my = _mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) | mx;
    while (my) {
      const int32_t j = BitsCtz(my);
-      int xy;
+      uint32_t xy;
      if ((mx >> j) & 1) {
        const int x = X[i + j];
        sumXY += x;
-        retval -= VP8LFastSLog2(x);
+        retval += VP8LFastSLog2(x);
      }
      xy = X[i + j] + Y[i + j];
      sumX += xy;
-      retval -= VP8LFastSLog2(xy);
+      retval += VP8LFastSLog2(xy);
      my &= my - 1;
    }
  }
-  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
+  retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
  return retval;
 }

--- a/src/dsp/lossless_enc_sse41.c
+++ b/src/dsp/lossless_enc_sse41.c
@ -98,7 +98,7 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
 static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
                                             int tile_width, int tile_height,
                                             int green_to_blue, int red_to_blue,
-                                             int histo[]) {
+                                             uint32_t histo[]) {
  const __m128i mult =
      MK_CST_16(CST_5b(red_to_blue) + 256,CST_5b(green_to_blue));
  const __m128i perm =
@ -143,8 +143,8 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,

 static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
                                            int tile_width, int tile_height,
-                                            int green_to_red, int histo[]) {
-
+                                            int green_to_red,
+                                            uint32_t histo[]) {
  const __m128i mult = MK_CST_16(0, CST_5b(green_to_red));
  const __m128i mask_g = _mm_set1_epi32(0x0000ff00);
  if (tile_width >= 4) {
--- a/src/dsp/upsampling_neon.c
+++ b/src/dsp/upsampling_neon.c
@ -189,57 +189,57 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
  }                                                                     \
 }

-#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP)                       \
-static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,    \
-                      const uint8_t* top_u, const uint8_t* top_v,       \
-                      const uint8_t* cur_u, const uint8_t* cur_v,       \
-                      uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
-  int block;                                                            \
-  /* 16 byte aligned array to cache reconstructed u and v */            \
-  uint8_t uv_buf[2 * 32 + 15];                                          \
-  uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);     \
-  const int uv_len = (len + 1) >> 1;                                    \
-  /* 9 pixels must be read-able for each block */                       \
-  const int num_blocks = (uv_len - 1) >> 3;                             \
-  const int leftover = uv_len - num_blocks * 8;                         \
-  const int last_pos = 1 + 16 * num_blocks;                             \
-                                                                        \
-  const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                  \
-  const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                  \
-                                                                        \
-  const int16x4_t coeff1 = vld1_s16(kCoeffs1);                          \
-  const int16x8_t R_Rounder = vdupq_n_s16(-14234);                      \
-  const int16x8_t G_Rounder = vdupq_n_s16(8708);                        \
-  const int16x8_t B_Rounder = vdupq_n_s16(-17685);                      \
-                                                                        \
-  /* Treat the first pixel in regular way */                            \
-  assert(top_y != NULL);                                                \
-  {                                                                     \
-    const int u0 = (top_u[0] + u_diag) >> 1;                            \
-    const int v0 = (top_v[0] + v_diag) >> 1;                            \
-    VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst);                         \
-  }                                                                     \
-  if (bottom_y != NULL) {                                               \
-    const int u0 = (cur_u[0] + u_diag) >> 1;                            \
-    const int v0 = (cur_v[0] + v_diag) >> 1;                            \
-    VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst);                   \
-  }                                                                     \
-                                                                        \
-  for (block = 0; block < num_blocks; ++block) {                        \
-    UPSAMPLE_16PIXELS(top_u, cur_u, r_uv);                              \
-    UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16);                         \
-    CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv,                    \
-                  top_dst, bottom_dst, 16 * block + 1, 16);             \
-    top_u += 8;                                                         \
-    cur_u += 8;                                                         \
-    top_v += 8;                                                         \
-    cur_v += 8;                                                         \
-  }                                                                     \
-                                                                        \
-  UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv);                    \
-  UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16);               \
-  CONVERT2RGB_1(VP8YuvTo ## FMT, XSTEP, top_y, bottom_y, r_uv,          \
-                top_dst, bottom_dst, last_pos, len - last_pos);         \
+#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP)                              \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
+                      const uint8_t* top_u, const uint8_t* top_v,              \
+                      const uint8_t* cur_u, const uint8_t* cur_v,              \
+                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
+  int block;                                                                   \
+  /* 16 byte aligned array to cache reconstructed u and v */                   \
+  uint8_t uv_buf[2 * 32 + 15];                                                 \
+  uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~(uintptr_t)15); \
+  const int uv_len = (len + 1) >> 1;                                           \
+  /* 9 pixels must be read-able for each block */                              \
+  const int num_blocks = (uv_len - 1) >> 3;                                    \
+  const int leftover = uv_len - num_blocks * 8;                                \
+  const int last_pos = 1 + 16 * num_blocks;                                    \
+                                                                               \
+  const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                         \
+  const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                         \
+                                                                               \
+  const int16x4_t coeff1 = vld1_s16(kCoeffs1);                                 \
+  const int16x8_t R_Rounder = vdupq_n_s16(-14234);                             \
+  const int16x8_t G_Rounder = vdupq_n_s16(8708);                               \
+  const int16x8_t B_Rounder = vdupq_n_s16(-17685);                             \
+                                                                               \
+  /* Treat the first pixel in regular way */                                   \
+  assert(top_y != NULL);                                                       \
+  {                                                                            \
+    const int u0 = (top_u[0] + u_diag) >> 1;                                   \
+    const int v0 = (top_v[0] + v_diag) >> 1;                                   \
+    VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst);                                \
+  }                                                                            \
+  if (bottom_y != NULL) {                                                      \
+    const int u0 = (cur_u[0] + u_diag) >> 1;                                   \
+    const int v0 = (cur_v[0] + v_diag) >> 1;                                   \
+    VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst);                          \
+  }                                                                            \
+                                                                               \
+  for (block = 0; block < num_blocks; ++block) {                               \
+    UPSAMPLE_16PIXELS(top_u, cur_u, r_uv);                                     \
+    UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16);                                \
+    CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv,                           \
+                  top_dst, bottom_dst, 16 * block + 1, 16);                    \
+    top_u += 8;                                                                \
+    cur_u += 8;                                                                \
+    top_v += 8;                                                                \
+    cur_v += 8;                                                                \
+  }                                                                            \
+                                                                               \
+  UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv);                           \
+  UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16);                      \
+  CONVERT2RGB_1(VP8YuvTo ## FMT, XSTEP, top_y, bottom_y, r_uv,                 \
+                top_dst, bottom_dst, last_pos, len - last_pos);                \
 }

 // NEON variants of the fancy upsampler.
--- a/src/dsp/upsampling_sse41.c
+++ b/src/dsp/upsampling_sse41.c
@ -123,7 +123,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
  int uv_pos, pos;                                                             \
  /* 16byte-aligned array to cache reconstructed u and v */                    \
  uint8_t uv_buf[14 * 32 + 15] = { 0 };                                        \
-  uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);             \
+  uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~(uintptr_t)15);  \
  uint8_t* const r_v = r_u + 32;                                               \
                                                                               \
  assert(top_y != NULL);                                                       \
--- a/src/enc/alpha_enc.c
+++ b/src/enc/alpha_enc.c
@ -276,6 +276,7 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height,
      stats->lossless_features = best.stats.lossless_features;
      stats->histogram_bits = best.stats.histogram_bits;
      stats->transform_bits = best.stats.transform_bits;
+      stats->cross_color_transform_bits = best.stats.cross_color_transform_bits;
      stats->cache_bits = best.stats.cache_bits;
      stats->palette_size = best.stats.palette_size;
      stats->lossless_size = best.stats.lossless_size;
--- a/src/enc/backward_references_cost_enc.c
+++ b/src/enc/backward_references_cost_enc.c
@ -15,7 +15,7 @@
 //

 #include <assert.h>
-#include <float.h>
+#include <string.h>

 #include "src/dsp/lossless_common.h"
 #include "src/enc/backward_references_enc.h"
@ -31,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
                                      const PixOrCopy v);

 typedef struct {
-  float alpha_[VALUES_IN_BYTE];
-  float red_[VALUES_IN_BYTE];
-  float blue_[VALUES_IN_BYTE];
-  float distance_[NUM_DISTANCE_CODES];
-  float* literal_;
+  uint32_t alpha_[VALUES_IN_BYTE];
+  uint32_t red_[VALUES_IN_BYTE];
+  uint32_t blue_[VALUES_IN_BYTE];
+  uint32_t distance_[NUM_DISTANCE_CODES];
+  uint32_t* literal_;
 } CostModel;

 static void ConvertPopulationCountTableToBitEstimates(
-    int num_symbols, const uint32_t population_counts[], float output[]) {
+    int num_symbols, const uint32_t population_counts[], uint32_t output[]) {
  uint32_t sum = 0;
  int nonzeros = 0;
  int i;
@ -52,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates(
  if (nonzeros <= 1) {
    memset(output, 0, num_symbols * sizeof(*output));
  } else {
-    const float logsum = VP8LFastLog2(sum);
+    const uint32_t logsum = VP8LFastLog2(sum);
    for (i = 0; i < num_symbols; ++i) {
      output[i] = logsum - VP8LFastLog2(population_counts[i]);
    }
@ -93,47 +93,47 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
  return ok;
 }

-static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) {
-  return m->alpha_[v >> 24] +
-         m->red_[(v >> 16) & 0xff] +
-         m->literal_[(v >> 8) & 0xff] +
-         m->blue_[v & 0xff];
+static WEBP_INLINE int64_t GetLiteralCost(const CostModel* const m,
+                                          uint32_t v) {
+  return (int64_t)m->alpha_[v >> 24] + m->red_[(v >> 16) & 0xff] +
+         m->literal_[(v >> 8) & 0xff] + m->blue_[v & 0xff];
 }

-static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) {
+static WEBP_INLINE int64_t GetCacheCost(const CostModel* const m,
+                                        uint32_t idx) {
  const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
-  return m->literal_[literal_idx];
+  return (int64_t)m->literal_[literal_idx];
 }

-static WEBP_INLINE float GetLengthCost(const CostModel* const m,
-                                       uint32_t length) {
+static WEBP_INLINE int64_t GetLengthCost(const CostModel* const m,
+                                         uint32_t length) {
  int code, extra_bits;
  VP8LPrefixEncodeBits(length, &code, &extra_bits);
-  return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
+  return (int64_t)m->literal_[VALUES_IN_BYTE + code] +
+         ((int64_t)extra_bits << LOG_2_PRECISION_BITS);
 }

-static WEBP_INLINE float GetDistanceCost(const CostModel* const m,
-                                         uint32_t distance) {
+static WEBP_INLINE int64_t GetDistanceCost(const CostModel* const m,
+                                           uint32_t distance) {
  int code, extra_bits;
  VP8LPrefixEncodeBits(distance, &code, &extra_bits);
-  return m->distance_[code] + extra_bits;
+  return (int64_t)m->distance_[code] +
+         ((int64_t)extra_bits << LOG_2_PRECISION_BITS);
 }

 static WEBP_INLINE void AddSingleLiteralWithCostModel(
    const uint32_t* const argb, VP8LColorCache* const hashers,
    const CostModel* const cost_model, int idx, int use_color_cache,
-    float prev_cost, float* const cost, uint16_t* const dist_array) {
-  float cost_val = prev_cost;
+    int64_t prev_cost, int64_t* const cost, uint16_t* const dist_array) {
+  int64_t cost_val = prev_cost;
  const uint32_t color = argb[idx];
  const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1;
  if (ix >= 0) {
    // use_color_cache is true and hashers contains color
-    const float mul0 = 0.68f;
-    cost_val += GetCacheCost(cost_model, ix) * mul0;
+    cost_val += DivRound(GetCacheCost(cost_model, ix) * 68, 100);
  } else {
-    const float mul1 = 0.82f;
    if (use_color_cache) VP8LColorCacheInsert(hashers, color);
-    cost_val += GetLiteralCost(cost_model, color) * mul1;
+    cost_val += DivRound(GetLiteralCost(cost_model, color) * 82, 100);
  }
  if (cost[idx] > cost_val) {
    cost[idx] = cost_val;
@ -163,7 +163,7 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel(
 // therefore no overlapping intervals.
 typedef struct CostInterval CostInterval;
 struct CostInterval {
-  float cost_;
+  int64_t cost_;
  int start_;
  int end_;
  int index_;
@ -173,7 +173,7 @@ struct CostInterval {

 // The GetLengthCost(cost_model, k) are cached in a CostCacheInterval.
 typedef struct {
-  float cost_;
+  int64_t cost_;
  int start_;
  int end_;       // Exclusive.
 } CostCacheInterval;
@ -188,8 +188,9 @@ typedef struct {
  int count_;  // The number of stored intervals.
  CostCacheInterval* cache_intervals_;
  size_t cache_intervals_size_;
-  float cost_cache_[MAX_LENGTH];  // Contains the GetLengthCost(cost_model, k).
-  float* costs_;
+  // Contains the GetLengthCost(cost_model, k).
+  int64_t cost_cache_[MAX_LENGTH];
+  int64_t* costs_;
  uint16_t* dist_array_;
  // Most of the time, we only need few intervals -> use a free-list, to avoid
  // fragmentation with small allocs in most common cases.
@ -298,7 +299,7 @@ static int CostManagerInit(CostManager* const manager,
    cur->end_ = 1;
    cur->cost_ = manager->cost_cache_[0];
    for (i = 1; i < cost_cache_size; ++i) {
-      const float cost_val = manager->cost_cache_[i];
+      const int64_t cost_val = manager->cost_cache_[i];
      if (cost_val != cur->cost_) {
        ++cur;
        // Initialize an interval.
@ -311,13 +312,15 @@ static int CostManagerInit(CostManager* const manager,
           manager->cache_intervals_size_);
  }

-  manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
+  manager->costs_ =
+      (int64_t*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
  if (manager->costs_ == NULL) {
    CostManagerClear(manager);
    return 0;
  }
-  // Set the initial costs_ high for every pixel as we will keep the minimum.
-  for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX;
+  // Set the initial costs_ to INT64_MAX for every pixel as we will keep the
+  // minimum.
+  for (i = 0; i < pix_count; ++i) manager->costs_[i] = WEBP_INT64_MAX;

  return 1;
 }
@ -325,7 +328,7 @@ static int CostManagerInit(CostManager* const manager,
 // Given the cost and the position that define an interval, update the cost at
 // pixel 'i' if it is smaller than the previously computed value.
 static WEBP_INLINE void UpdateCost(CostManager* const manager, int i,
-                                   int position, float cost) {
+                                   int position, int64_t cost) {
  const int k = i - position;
  assert(k >= 0 && k < MAX_LENGTH);

@ -339,7 +342,7 @@ static WEBP_INLINE void UpdateCost(CostManager* const manager, int i,
 // all the pixels between 'start' and 'end' excluded.
 static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager,
                                              int start, int end, int position,
-                                              float cost) {
+                                              int64_t cost) {
  int i;
  for (i = start; i < end; ++i) UpdateCost(manager, i, position, cost);
 }
@ -424,7 +427,7 @@ static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager,
 // interval_in as a hint. The intervals are sorted by start_ value.
 static WEBP_INLINE void InsertInterval(CostManager* const manager,
                                       CostInterval* const interval_in,
-                                       float cost, int position, int start,
+                                       int64_t cost, int position, int start,
                                       int end) {
  CostInterval* interval_new;

@ -463,7 +466,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
 // If handling the interval or one of its subintervals becomes to heavy, its
 // contribution is added to the costs right away.
 static WEBP_INLINE void PushInterval(CostManager* const manager,
-                                     float distance_cost, int position,
+                                     int64_t distance_cost, int position,
                                     int len) {
  size_t i;
  CostInterval* interval = manager->head_;
@ -478,7 +481,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
    int j;
    for (j = position; j < position + len; ++j) {
      const int k = j - position;
-      float cost_tmp;
+      int64_t cost_tmp;
      assert(k >= 0 && k < MAX_LENGTH);
      cost_tmp = distance_cost + manager->cost_cache_[k];

@ -498,7 +501,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
    const int end = position + (cost_cache_intervals[i].end_ > len
                                 ? len
                                 : cost_cache_intervals[i].end_);
-    const float cost = distance_cost + cost_cache_intervals[i].cost_;
+    const int64_t cost = distance_cost + cost_cache_intervals[i].cost_;

    for (; interval != NULL && interval->start_ < end;
         interval = interval_next) {
@ -576,7 +579,7 @@ static int BackwardReferencesHashChainDistanceOnly(
  const int pix_count = xsize * ysize;
  const int use_color_cache = (cache_bits > 0);
  const size_t literal_array_size =
-      sizeof(float) * (VP8LHistogramNumCodes(cache_bits));
+      sizeof(*((CostModel*)NULL)->literal_) * VP8LHistogramNumCodes(cache_bits);
  const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
  CostModel* const cost_model =
      (CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
@ -584,13 +587,13 @@ static int BackwardReferencesHashChainDistanceOnly(
  CostManager* cost_manager =
      (CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager));
  int offset_prev = -1, len_prev = -1;
-  float offset_cost = -1.f;
+  int64_t offset_cost = -1;
  int first_offset_is_constant = -1;  // initialized with 'impossible' value
  int reach = 0;

  if (cost_model == NULL || cost_manager == NULL) goto Error;

-  cost_model->literal_ = (float*)(cost_model + 1);
+  cost_model->literal_ = (uint32_t*)(cost_model + 1);
  if (use_color_cache) {
    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    if (!cc_init) goto Error;
@ -608,11 +611,12 @@ static int BackwardReferencesHashChainDistanceOnly(
  // non-processed locations from this point.
  dist_array[0] = 0;
  // Add first pixel as literal.
-  AddSingleLiteralWithCostModel(argb, &hashers, cost_model, 0, use_color_cache,
-                                0.f, cost_manager->costs_, dist_array);
+  AddSingleLiteralWithCostModel(argb, &hashers, cost_model, /*idx=*/0,
+                                use_color_cache, /*prev_cost=*/0,
+                                cost_manager->costs_, dist_array);

  for (i = 1; i < pix_count; ++i) {
-    const float prev_cost = cost_manager->costs_[i - 1];
+    const int64_t prev_cost = cost_manager->costs_[i - 1];
    int offset, len;
    VP8LHashChainFindCopy(hash_chain, i, &offset, &len);

--- a/src/enc/backward_references_enc.c
+++ b/src/enc/backward_references_enc.c
@ -13,8 +13,6 @@
 #include "src/enc/backward_references_enc.h"

 #include <assert.h>
-#include <float.h>
-#include <math.h>

 #include "src/dsp/dsp.h"
 #include "src/dsp/lossless.h"
@ -27,8 +25,6 @@

 #define MIN_BLOCK_SIZE 256  // minimum block size for backward references

-#define MAX_ENTROPY    (1e30f)
-
 // 1M window (4M bytes) minus 120 special codes for short distances.
 #define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120)

@ -758,7 +754,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
                                  int* const best_cache_bits) {
  int i;
  const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
-  float entropy_min = MAX_ENTROPY;
+  uint64_t entropy_min = WEBP_UINT64_MAX;
  int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
  VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
@ -843,7 +839,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
  }

  for (i = 0; i <= cache_bits_max; ++i) {
-    const float entropy = VP8LHistogramEstimateBits(histos[i]);
+    const uint64_t entropy = VP8LHistogramEstimateBits(histos[i]);
    if (i == 0 || entropy < entropy_min) {
      entropy_min = entropy;
      *best_cache_bits = i;
@ -920,7 +916,7 @@ static int GetBackwardReferences(int width, int height,
  int i, lz77_type;
  // Index 0 is for a color cache, index 1 for no cache (if needed).
  int lz77_types_best[2] = {0, 0};
-  float bit_costs_best[2] = {FLT_MAX, FLT_MAX};
+  uint64_t bit_costs_best[2] = {WEBP_UINT64_MAX, WEBP_UINT64_MAX};
  VP8LHashChain hash_chain_box;
  VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
  int status = 0;
@ -932,7 +928,7 @@ static int GetBackwardReferences(int width, int height,
  for (lz77_type = 1; lz77_types_to_try;
       lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
    int res = 0;
-    float bit_cost = 0.f;
+    uint64_t bit_cost = 0u;
    if ((lz77_types_to_try & lz77_type) == 0) continue;
    switch (lz77_type) {
      case kLZ77RLE:
@ -1006,7 +1002,7 @@ static int GetBackwardReferences(int width, int height,
      const VP8LHashChain* const hash_chain_tmp =
          (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
      const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
-      float bit_cost_trace;
+      uint64_t bit_cost_trace;
      if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
                                                hash_chain_tmp, &refs[i],
                                                refs_tmp)) {
--- a/src/enc/config_enc.c
+++ b/src/enc/config_enc.c
@ -55,7 +55,6 @@ int WebPConfigInitInternal(WebPConfig* config,
  config->thread_level = 0;
  config->low_memory = 0;
  config->near_lossless = 100;
-  config->use_delta_palette = 0;
  config->use_sharp_yuv = 0;

  // TODO(skal): tune.
@ -125,9 +124,6 @@ int WebPValidateConfig(const WebPConfig* config) {
  if (config->thread_level < 0 || config->thread_level > 1) return 0;
  if (config->low_memory < 0 || config->low_memory > 1) return 0;
  if (config->exact < 0 || config->exact > 1) return 0;
-  if (config->use_delta_palette < 0 || config->use_delta_palette > 1) {
-    return 0;
-  }
  if (config->use_sharp_yuv < 0 || config->use_sharp_yuv > 1) return 0;

  return 1;
--- a/src/enc/cost_enc.c
+++ b/src/enc/cost_enc.c
@ -19,7 +19,7 @@
 // For each given level, the following table gives the pattern of contexts to
 // use for coding it (in [][0]) as well as the bit value to use for each
 // context (in [][1]).
-const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
+static const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
                  {0x001, 0x000}, {0x007, 0x001}, {0x00f, 0x005},
  {0x00f, 0x00d}, {0x033, 0x003}, {0x033, 0x003}, {0x033, 0x023},
  {0x033, 0x023}, {0x033, 0x023}, {0x033, 0x023}, {0x0d3, 0x013},
--- a/src/enc/cost_enc.h
+++ b/src/enc/cost_enc.h
@ -61,7 +61,6 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
 }

 // Level cost calculations
-extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
 void VP8CalculateLevelCosts(VP8EncProba* const proba);
 static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) {
  return VP8LevelFixedCosts[level]
--- a/src/enc/histogram_enc.c
+++ b/src/enc/histogram_enc.c
@ -13,8 +13,7 @@
 #include "src/webp/config.h"
 #endif

-#include <float.h>
-#include <math.h>
+#include <string.h>

 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"
@ -23,8 +22,6 @@
 #include "src/enc/vp8i_enc.h"
 #include "src/utils/utils.h"

-#define MAX_BIT_COST FLT_MAX
-
 // Number of partitions for the three dominant (literal, red and blue) symbol
 // costs.
 #define NUM_PARTITIONS 4
@ -33,10 +30,18 @@
 // Maximum number of histograms allowed in greedy combining algorithm.
 #define MAX_HISTO_GREEDY 100

+// Return the size of the histogram for a given cache_bits.
+static int GetHistogramSize(int cache_bits) {
+  const int literal_size = VP8LHistogramNumCodes(cache_bits);
+  const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;
+  assert(total_size <= (size_t)0x7fffffff);
+  return (int)total_size;
+}
+
 static void HistogramClear(VP8LHistogram* const p) {
  uint32_t* const literal = p->literal_;
  const int cache_bits = p->palette_code_bits_;
-  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  const int histo_size = GetHistogramSize(cache_bits);
  memset(p, 0, histo_size);
  p->palette_code_bits_ = cache_bits;
  p->literal_ = literal;
@ -54,20 +59,13 @@ static void HistogramCopy(const VP8LHistogram* const src,
  uint32_t* const dst_literal = dst->literal_;
  const int dst_cache_bits = dst->palette_code_bits_;
  const int literal_size = VP8LHistogramNumCodes(dst_cache_bits);
-  const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
+  const int histo_size = GetHistogramSize(dst_cache_bits);
  assert(src->palette_code_bits_ == dst_cache_bits);
  memcpy(dst, src, histo_size);
  dst->literal_ = dst_literal;
  memcpy(dst->literal_, src->literal_, literal_size * sizeof(*dst->literal_));
 }

-int VP8LGetHistogramSize(int cache_bits) {
-  const int literal_size = VP8LHistogramNumCodes(cache_bits);
-  const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;
-  assert(total_size <= (size_t)0x7fffffff);
-  return (int)total_size;
-}
-
 void VP8LFreeHistogram(VP8LHistogram* const histo) {
  WebPSafeFree(histo);
 }
@ -102,17 +100,17 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
    HistogramClear(p);
  } else {
    p->trivial_symbol_ = 0;
-    p->bit_cost_ = 0.;
-    p->literal_cost_ = 0.;
-    p->red_cost_ = 0.;
-    p->blue_cost_ = 0.;
+    p->bit_cost_ = 0;
+    p->literal_cost_ = 0;
+    p->red_cost_ = 0;
+    p->blue_cost_ = 0;
    memset(p->is_used_, 0, sizeof(p->is_used_));
  }
 }

 VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
  VP8LHistogram* histo = NULL;
-  const int total_size = VP8LGetHistogramSize(cache_bits);
+  const int total_size = GetHistogramSize(cache_bits);
  uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
  if (memory == NULL) return NULL;
  histo = (VP8LHistogram*)memory;
@ -126,7 +124,7 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
 static void HistogramSetResetPointers(VP8LHistogramSet* const set,
                                      int cache_bits) {
  int i;
-  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  const int histo_size = GetHistogramSize(cache_bits);
  uint8_t* memory = (uint8_t*) (set->histograms);
  memory += set->max_size * sizeof(*set->histograms);
  for (i = 0; i < set->max_size; ++i) {
@ -140,7 +138,7 @@ static void HistogramSetResetPointers(VP8LHistogramSet* const set,

 // Returns the total size of the VP8LHistogramSet.
 static size_t HistogramSetTotalSize(int size, int cache_bits) {
-  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  const int histo_size = GetHistogramSize(cache_bits);
  return (sizeof(VP8LHistogramSet) + size * (sizeof(VP8LHistogram*) +
          histo_size + WEBP_ALIGN_CST));
 }
@ -230,8 +228,8 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
 // -----------------------------------------------------------------------------
 // Entropy-related functions.

-static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) {
-  float mix;
+static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) {
+  uint64_t mix;
  if (entropy->nonzeros < 5) {
    if (entropy->nonzeros <= 1) {
      return 0;
@ -240,67 +238,72 @@ static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) {
    // Let's mix in a bit of entropy to favor good clustering when
    // distributions of these are combined.
    if (entropy->nonzeros == 2) {
-      return 0.99f * entropy->sum + 0.01f * entropy->entropy;
+      return DivRound(99 * ((uint64_t)entropy->sum << LOG_2_PRECISION_BITS) +
+                          entropy->entropy,
+                      100);
    }
    // No matter what the entropy says, we cannot be better than min_limit
    // with Huffman coding. I am mixing a bit of entropy into the
    // min_limit since it produces much better (~0.5 %) compression results
    // perhaps because of better entropy clustering.
    if (entropy->nonzeros == 3) {
-      mix = 0.95f;
+      mix = 950;
    } else {
-      mix = 0.7f;  // nonzeros == 4.
+      mix = 700;  // nonzeros == 4.
    }
  } else {
-    mix = 0.627f;
+    mix = 627;
  }

  {
-    float min_limit = 2.f * entropy->sum - entropy->max_val;
-    min_limit = mix * min_limit + (1.f - mix) * entropy->entropy;
+    uint64_t min_limit = (uint64_t)(2 * entropy->sum - entropy->max_val)
+                         << LOG_2_PRECISION_BITS;
+    min_limit =
+        DivRound(mix * min_limit + (1000 - mix) * entropy->entropy, 1000);
    return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
  }
 }

-float VP8LBitsEntropy(const uint32_t* const array, int n) {
+uint64_t VP8LBitsEntropy(const uint32_t* const array, int n) {
  VP8LBitEntropy entropy;
  VP8LBitsEntropyUnrefined(array, n, &entropy);

  return BitsEntropyRefine(&entropy);
 }

-static float InitialHuffmanCost(void) {
+static uint64_t InitialHuffmanCost(void) {
  // Small bias because Huffman code length is typically not stored in
  // full length.
-  static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
-  static const float kSmallBias = 9.1f;
-  return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
+  static const uint64_t kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
+  // Subtract a bias of 9.1.
+  return (kHuffmanCodeOfHuffmanCodeSize << LOG_2_PRECISION_BITS) -
+         DivRound(91ll << LOG_2_PRECISION_BITS, 10);
 }

 // Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
-static float FinalHuffmanCost(const VP8LStreaks* const stats) {
-  // The constants in this function are experimental and got rounded from
+static uint64_t FinalHuffmanCost(const VP8LStreaks* const stats) {
+  // The constants in this function are empirical and got rounded from
  // their original values in 1/8 when switched to 1/1024.
-  float retval = InitialHuffmanCost();
+  uint64_t retval = InitialHuffmanCost();
  // Second coefficient: Many zeros in the histogram are covered efficiently
  // by a run-length encode. Originally 2/8.
-  retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1];
+  uint64_t retval_extra = stats->counts[0] * 1600 + 240 * stats->streaks[0][1];
  // Second coefficient: Constant values are encoded less efficiently, but still
  // RLE'ed. Originally 6/8.
-  retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1];
+  retval_extra += stats->counts[1] * 2640 + 720 * stats->streaks[1][1];
  // 0s are usually encoded more efficiently than non-0s.
  // Originally 15/8.
-  retval += 1.796875f * stats->streaks[0][0];
+  retval_extra += 1840 * stats->streaks[0][0];
  // Originally 26/8.
-  retval += 3.28125f * stats->streaks[1][0];
-  return retval;
+  retval_extra += 3360 * stats->streaks[1][0];
+  return retval + (retval_extra << (LOG_2_PRECISION_BITS - 10));
 }

 // Get the symbol entropy for the distribution 'population'.
 // Set 'trivial_sym', if there's only one symbol present in the distribution.
-static float PopulationCost(const uint32_t* const population, int length,
-                            uint32_t* const trivial_sym,
-                            uint8_t* const is_used) {
+static uint64_t PopulationCost(const uint32_t* const population, int length,
+                               uint32_t* const trivial_sym,
+                               uint8_t* const is_used) {
  VP8LBitEntropy bit_entropy;
  VP8LStreaks stats;
  VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
@ -316,10 +319,11 @@ static float PopulationCost(const uint32_t* const population, int length,

 // trivial_at_end is 1 if the two histograms only have one element that is
 // non-zero: both the zero-th one, or both the last one.
-static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X,
-                                            const uint32_t* const Y, int length,
-                                            int is_X_used, int is_Y_used,
-                                            int trivial_at_end) {
+static WEBP_INLINE uint64_t GetCombinedEntropy(const uint32_t* const X,
+                                               const uint32_t* const Y,
+                                               int length, int is_X_used,
+                                               int is_Y_used,
+                                               int trivial_at_end) {
  VP8LStreaks stats;
  if (trivial_at_end) {
    // This configuration is due to palettization that transforms an indexed
@ -357,7 +361,7 @@ static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X,
 }

 // Estimates the Entropy + Huffman + other block overhead size cost.
-float VP8LHistogramEstimateBits(VP8LHistogram* const p) {
+uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p) {
  return PopulationCost(p->literal_,
                        VP8LHistogramNumCodes(p->palette_code_bits_), NULL,
                        &p->is_used_[0]) +
@ -366,27 +370,42 @@ float VP8LHistogramEstimateBits(VP8LHistogram* const p) {
         PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3]) +
         PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL,
                        &p->is_used_[4]) +
-         (float)VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES,
-                              NUM_LENGTH_CODES) +
-         (float)VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
+         ((uint64_t)(VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES,
+                                   NUM_LENGTH_CODES) +
+                     VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES))
+          << LOG_2_PRECISION_BITS);
 }

 // -----------------------------------------------------------------------------
 // Various histogram combine/cost-eval functions

-static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
-                                       const VP8LHistogram* const b,
-                                       float cost_threshold, float* cost) {
+// Set a + b in b, saturating at WEBP_INT64_MAX.
+static WEBP_INLINE void SaturateAdd(uint64_t a, int64_t* b) {
+  if (*b < 0 || (int64_t)a <= WEBP_INT64_MAX - *b) {
+    *b += (int64_t)a;
+  } else {
+    *b = WEBP_INT64_MAX;
+  }
+}
+
+// Returns 1 if the cost of the combined histogram is less than the threshold.
+// Otherwise returns 0 and the cost is invalid due to early bail-out.
+WEBP_NODISCARD static int GetCombinedHistogramEntropy(
+    const VP8LHistogram* const a, const VP8LHistogram* const b,
+    int64_t cost_threshold_in, uint64_t* cost) {
  const int palette_code_bits = a->palette_code_bits_;
  int trivial_at_end = 0;
+  const uint64_t cost_threshold = (uint64_t)cost_threshold_in;
  assert(a->palette_code_bits_ == b->palette_code_bits_);
-  *cost += GetCombinedEntropy(a->literal_, b->literal_,
-                              VP8LHistogramNumCodes(palette_code_bits),
-                              a->is_used_[0], b->is_used_[0], 0);
-  *cost += (float)VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
-                                        b->literal_ + NUM_LITERAL_CODES,
-                                        NUM_LENGTH_CODES);
-  if (*cost > cost_threshold) return 0;
+  if (cost_threshold_in <= 0) return 0;
+  *cost = GetCombinedEntropy(a->literal_, b->literal_,
+                             VP8LHistogramNumCodes(palette_code_bits),
+                             a->is_used_[0], b->is_used_[0], 0);
+  *cost += (uint64_t)VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
+                                           b->literal_ + NUM_LITERAL_CODES,
+                                           NUM_LENGTH_CODES)
+           << LOG_2_PRECISION_BITS;
+  if (*cost >= cost_threshold) return 0;

  if (a->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM &&
      a->trivial_symbol_ == b->trivial_symbol_) {
@ -401,27 +420,24 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
    }
  }

-  *cost +=
-      GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, a->is_used_[1],
-                         b->is_used_[1], trivial_at_end);
-  if (*cost > cost_threshold) return 0;
+  *cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES,
+                              a->is_used_[1], b->is_used_[1], trivial_at_end);
+  if (*cost >= cost_threshold) return 0;

-  *cost +=
-      GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, a->is_used_[2],
-                         b->is_used_[2], trivial_at_end);
-  if (*cost > cost_threshold) return 0;
+  *cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES,
+                              a->is_used_[2], b->is_used_[2], trivial_at_end);
+  if (*cost >= cost_threshold) return 0;

-  *cost +=
-      GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
-                         a->is_used_[3], b->is_used_[3], trivial_at_end);
-  if (*cost > cost_threshold) return 0;
+  *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
+                              a->is_used_[3], b->is_used_[3], trivial_at_end);
+  if (*cost >= cost_threshold) return 0;

-  *cost +=
-      GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
-                         a->is_used_[4], b->is_used_[4], 0);
-  *cost += (float)VP8LExtraCostCombined(a->distance_, b->distance_,
-                                        NUM_DISTANCE_CODES);
-  if (*cost > cost_threshold) return 0;
+  *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
+                              a->is_used_[4], b->is_used_[4], 0);
+  *cost += (uint64_t)VP8LExtraCostCombined(a->distance_, b->distance_,
+                                           NUM_DISTANCE_CODES)
+           << LOG_2_PRECISION_BITS;
+  if (*cost >= cost_threshold) return 0;

  return 1;
 }
@ -441,33 +457,39 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a,
 // Since the previous score passed is 'cost_threshold', we only need to compare
 // the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
 // early.
-static float HistogramAddEval(const VP8LHistogram* const a,
-                              const VP8LHistogram* const b,
-                              VP8LHistogram* const out, float cost_threshold) {
-  float cost = 0;
-  const float sum_cost = a->bit_cost_ + b->bit_cost_;
-  cost_threshold += sum_cost;
+// Returns 1 if the cost is less than the threshold.
+// Otherwise returns 0 and the cost is invalid due to early bail-out.
+WEBP_NODISCARD static int HistogramAddEval(const VP8LHistogram* const a,
+                                           const VP8LHistogram* const b,
+                                           VP8LHistogram* const out,
+                                           int64_t cost_threshold) {
+  uint64_t cost;
+  const uint64_t sum_cost = a->bit_cost_ + b->bit_cost_;
+  SaturateAdd(sum_cost, &cost_threshold);
+  if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) return 0;

-  if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
-    HistogramAdd(a, b, out);
-    out->bit_cost_ = cost;
-    out->palette_code_bits_ = a->palette_code_bits_;
-  }
-
-  return cost - sum_cost;
+  HistogramAdd(a, b, out);
+  out->bit_cost_ = cost;
+  out->palette_code_bits_ = a->palette_code_bits_;
+  return 1;
 }

 // Same as HistogramAddEval(), except that the resulting histogram
 // is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
 // the term C(b) which is constant over all the evaluations.
-static float HistogramAddThresh(const VP8LHistogram* const a,
-                                const VP8LHistogram* const b,
-                                float cost_threshold) {
-  float cost;
+// Returns 1 if the cost is less than the threshold.
+// Otherwise returns 0 and the cost is invalid due to early bail-out.
+WEBP_NODISCARD static int HistogramAddThresh(const VP8LHistogram* const a,
+                                             const VP8LHistogram* const b,
+                                             int64_t cost_threshold,
+                                             int64_t* cost_out) {
+  uint64_t cost;
  assert(a != NULL && b != NULL);
-  cost = -a->bit_cost_;
-  GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
-  return cost;
+  SaturateAdd(a->bit_cost_, &cost_threshold);
+  if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) return 0;
+
+  *cost_out = (int64_t)cost - (int64_t)a->bit_cost_;
+  return 1;
 }

 // -----------------------------------------------------------------------------
@ -475,21 +497,21 @@ static float HistogramAddThresh(const VP8LHistogram* const a,
 // The structure to keep track of cost range for the three dominant entropy
 // symbols.
 typedef struct {
-  float literal_max_;
-  float literal_min_;
-  float red_max_;
-  float red_min_;
-  float blue_max_;
-  float blue_min_;
+  uint64_t literal_max_;
+  uint64_t literal_min_;
+  uint64_t red_max_;
+  uint64_t red_min_;
+  uint64_t blue_max_;
+  uint64_t blue_min_;
 } DominantCostRange;

 static void DominantCostRangeInit(DominantCostRange* const c) {
-  c->literal_max_ = 0.;
-  c->literal_min_ = MAX_BIT_COST;
-  c->red_max_ = 0.;
-  c->red_min_ = MAX_BIT_COST;
-  c->blue_max_ = 0.;
-  c->blue_min_ = MAX_BIT_COST;
+  c->literal_max_ = 0;
+  c->literal_min_ = WEBP_UINT64_MAX;
+  c->red_max_ = 0;
+  c->red_min_ = WEBP_UINT64_MAX;
+  c->blue_max_ = 0;
+  c->blue_min_ = WEBP_UINT64_MAX;
 }

 static void UpdateDominantCostRange(
@ -504,15 +526,18 @@ static void UpdateDominantCostRange(

 static void UpdateHistogramCost(VP8LHistogram* const h) {
  uint32_t alpha_sym, red_sym, blue_sym;
-  const float alpha_cost =
+  const uint64_t alpha_cost =
      PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]);
-  const float distance_cost =
+  const uint64_t distance_cost =
      PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
-      (float)VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
+      ((uint64_t)VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES)
+       << LOG_2_PRECISION_BITS);
  const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
  h->literal_cost_ =
      PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) +
-      (float)VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
+      ((uint64_t)VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
+                               NUM_LENGTH_CODES)
+       << LOG_2_PRECISION_BITS);
  h->red_cost_ =
      PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]);
  h->blue_cost_ =
@ -527,10 +552,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
  }
 }

-static int GetBinIdForEntropy(float min, float max, float val) {
-  const float range = max - min;
-  if (range > 0.) {
-    const float delta = val - min;
+static int GetBinIdForEntropy(uint64_t min, uint64_t max, uint64_t val) {
+  const uint64_t range = max - min;
+  if (range > 0) {
+    const uint64_t delta = val - min;
    return (int)((NUM_PARTITIONS - 1e-6) * delta / range);
  } else {
    return 0;
@ -639,11 +664,12 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,

 // Merges some histograms with same bin_id together if it's advantageous.
 // Sets the remaining histograms to NULL.
+// 'combine_cost_factor' has to be divided by 100.
 static void HistogramCombineEntropyBin(
    VP8LHistogramSet* const image_histo, int* num_used,
    const uint16_t* const clusters, uint16_t* const cluster_mappings,
    VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins,
-    float combine_cost_factor, int low_effort) {
+    int32_t combine_cost_factor, int low_effort) {
  VP8LHistogram** const histograms = image_histo->histograms;
  int idx;
  struct {
@ -673,11 +699,11 @@ static void HistogramCombineEntropyBin(
      cluster_mappings[clusters[idx]] = clusters[first];
    } else {
      // try to merge #idx into #first (both share the same bin_id)
-      const float bit_cost = histograms[idx]->bit_cost_;
-      const float bit_cost_thresh = -bit_cost * combine_cost_factor;
-      const float curr_cost_diff = HistogramAddEval(
-          histograms[first], histograms[idx], cur_combo, bit_cost_thresh);
-      if (curr_cost_diff < bit_cost_thresh) {
+      const uint64_t bit_cost = histograms[idx]->bit_cost_;
+      const int64_t bit_cost_thresh =
+          -DivRound((int64_t)bit_cost * combine_cost_factor, 100);
+      if (HistogramAddEval(histograms[first], histograms[idx], cur_combo,
+                           bit_cost_thresh)) {
        // Try to merge two histograms only if the combo is a trivial one or
        // the two candidate histograms are already non-trivial.
        // For some images, 'try_combine' turns out to be false for a lot of
@ -724,8 +750,8 @@ static uint32_t MyRand(uint32_t* const seed) {
 typedef struct {
  int idx1;
  int idx2;
-  float cost_diff;
-  float cost_combo;
+  int64_t cost_diff;
+  uint64_t cost_combo;
 } HistogramPair;

 typedef struct {
@ -765,7 +791,7 @@ static void HistoQueuePopPair(HistoQueue* const histo_queue,
 // Check whether a pair in the queue should be updated as head or not.
 static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
                                 HistogramPair* const pair) {
-  assert(pair->cost_diff < 0.);
+  assert(pair->cost_diff < 0);
  assert(pair >= histo_queue->queue &&
         pair < (histo_queue->queue + histo_queue->size));
  assert(histo_queue->size > 0);
@ -778,29 +804,35 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
 }

 // Update the cost diff and combo of a pair of histograms. This needs to be
-// called when the the histograms have been merged with a third one.
-static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
-                                 const VP8LHistogram* const h2, float threshold,
-                                 HistogramPair* const pair) {
-  const float sum_cost = h1->bit_cost_ + h2->bit_cost_;
-  pair->cost_combo = 0.;
-  GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo);
-  pair->cost_diff = pair->cost_combo - sum_cost;
+// called when the histograms have been merged with a third one.
+// Returns 1 if the cost diff is less than the threshold.
+// Otherwise returns 0 and the cost is invalid due to early bail-out.
+WEBP_NODISCARD static int HistoQueueUpdatePair(const VP8LHistogram* const h1,
+                                               const VP8LHistogram* const h2,
+                                               int64_t cost_threshold,
+                                               HistogramPair* const pair) {
+  const int64_t sum_cost = h1->bit_cost_ + h2->bit_cost_;
+  SaturateAdd(sum_cost, &cost_threshold);
+  if (!GetCombinedHistogramEntropy(h1, h2, cost_threshold, &pair->cost_combo)) {
+    return 0;
+  }
+  pair->cost_diff = (int64_t)pair->cost_combo - sum_cost;
+  return 1;
 }

 // Create a pair from indices "idx1" and "idx2" provided its cost
 // is inferior to "threshold", a negative entropy.
-// It returns the cost of the pair, or 0. if it superior to threshold.
-static float HistoQueuePush(HistoQueue* const histo_queue,
-                            VP8LHistogram** const histograms, int idx1,
-                            int idx2, float threshold) {
+// It returns the cost of the pair, or 0 if it superior to threshold.
+static int64_t HistoQueuePush(HistoQueue* const histo_queue,
+                              VP8LHistogram** const histograms, int idx1,
+                              int idx2, int64_t threshold) {
  const VP8LHistogram* h1;
  const VP8LHistogram* h2;
  HistogramPair pair;

  // Stop here if the queue is full.
-  if (histo_queue->size == histo_queue->max_size) return 0.;
-  assert(threshold <= 0.);
+  if (histo_queue->size == histo_queue->max_size) return 0;
+  assert(threshold <= 0);
  if (idx1 > idx2) {
    const int tmp = idx2;
    idx2 = idx1;
@ -811,10 +843,8 @@ static float HistoQueuePush(HistoQueue* const histo_queue,
  h1 = histograms[idx1];
  h2 = histograms[idx2];

-  HistoQueueUpdatePair(h1, h2, threshold, &pair);
-
  // Do not even consider the pair if it does not improve the entropy.
-  if (pair.cost_diff >= threshold) return 0.;
+  if (!HistoQueueUpdatePair(h1, h2, threshold, &pair)) return 0;

  histo_queue->queue[histo_queue->size++] = pair;
  HistoQueueUpdateHead(histo_queue, &histo_queue->queue[histo_queue->size - 1]);
@ -851,7 +881,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
    for (j = i + 1; j < image_histo_size; ++j) {
      // Initialize queue.
      if (image_histo->histograms[j] == NULL) continue;
-      HistoQueuePush(&histo_queue, histograms, i, j, 0.);
+      HistoQueuePush(&histo_queue, histograms, i, j, 0);
    }
  }

@ -879,7 +909,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
    // Push new pairs formed with combined histogram to the queue.
    for (i = 0; i < image_histo->size; ++i) {
      if (i == idx1 || image_histo->histograms[i] == NULL) continue;
-      HistoQueuePush(&histo_queue, image_histo->histograms, idx1, i, 0.);
+      HistoQueuePush(&histo_queue, image_histo->histograms, idx1, i, 0);
    }
  }

@ -937,8 +967,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
           ++tries_with_no_success < num_tries_no_success;
       ++iter) {
    int* mapping_index;
-    float best_cost =
-        (histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff;
+    int64_t best_cost =
+        (histo_queue.size == 0) ? 0 : histo_queue.queue[0].cost_diff;
    int best_idx1 = -1, best_idx2 = 1;
    const uint32_t rand_range = (*num_used - 1) * (*num_used);
    // (*num_used) / 2 was chosen empirically. Less means faster but worse
@ -947,7 +977,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,

    // Pick random samples.
    for (j = 0; *num_used >= 2 && j < num_tries; ++j) {
-      float curr_cost;
+      int64_t curr_cost;
      // Choose two different histograms at random and try to combine them.
      const uint32_t tmp = MyRand(&seed) % rand_range;
      uint32_t idx1 = tmp / (*num_used - 1);
@ -1012,8 +1042,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
      }
      if (do_eval) {
        // Re-evaluate the cost of an updated pair.
-        HistoQueueUpdatePair(histograms[p->idx1], histograms[p->idx2], 0., p);
-        if (p->cost_diff >= 0.) {
+        if (!HistoQueueUpdatePair(histograms[p->idx1], histograms[p->idx2], 0,
+                                  p)) {
          HistoQueuePopPair(&histo_queue, p);
          continue;
        }
@ -1049,7 +1079,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
  if (out_size > 1) {
    for (i = 0; i < in_size; ++i) {
      int best_out = 0;
-      float best_bits = MAX_BIT_COST;
+      int64_t best_bits = WEBP_INT64_MAX;
      int k;
      if (in_histo[i] == NULL) {
        // Arbitrarily set to the previous value if unused to help future LZ77.
@ -1057,9 +1087,9 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
        continue;
      }
      for (k = 0; k < out_size; ++k) {
-        float cur_bits;
-        cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
-        if (k == 0 || cur_bits < best_bits) {
+        int64_t cur_bits;
+        if (HistogramAddThresh(out_histo[k], in_histo[i], best_bits,
+                               &cur_bits)) {
          best_bits = cur_bits;
          best_out = k;
        }
@ -1085,13 +1115,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
  }
 }

-static float GetCombineCostFactor(int histo_size, int quality) {
-  float combine_cost_factor = 0.16f;
+static int32_t GetCombineCostFactor(int histo_size, int quality) {
+  int32_t combine_cost_factor = 16;
  if (quality < 90) {
-    if (histo_size > 256) combine_cost_factor /= 2.f;
-    if (histo_size > 512) combine_cost_factor /= 2.f;
-    if (histo_size > 1024) combine_cost_factor /= 2.f;
-    if (quality <= 50) combine_cost_factor /= 2.f;
+    if (histo_size > 256) combine_cost_factor /= 2;
+    if (histo_size > 512) combine_cost_factor /= 2;
+    if (histo_size > 1024) combine_cost_factor /= 2;
+    if (quality <= 50) combine_cost_factor /= 2;
  }
  return combine_cost_factor;
 }
@ -1181,7 +1211,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
  const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
  int entropy_combine;
  uint16_t* const map_tmp =
-      WebPSafeMalloc(2 * image_histo_raw_size, sizeof(*map_tmp));
+      (uint16_t*)WebPSafeMalloc(2 * image_histo_raw_size, sizeof(*map_tmp));
  uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size;
  int num_used = image_histo_raw_size;
  if (orig_histo == NULL || map_tmp == NULL) {
@ -1201,7 +1231,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,

  if (entropy_combine) {
    uint16_t* const bin_map = map_tmp;
-    const float combine_cost_factor =
+    const int32_t combine_cost_factor =
        GetCombineCostFactor(image_histo_raw_size, quality);
    const uint32_t num_clusters = num_used;

--- a/src/enc/histogram_enc.h
+++ b/src/enc/histogram_enc.h
@ -40,10 +40,10 @@ typedef struct {
  int palette_code_bits_;
  uint32_t trivial_symbol_;  // True, if histograms for Red, Blue & Alpha
                             // literal symbols are single valued.
-  float bit_cost_;           // cached value of bit cost.
-  float literal_cost_;       // Cached values of dominant entropy costs:
-  float red_cost_;           // literal, red & blue.
-  float blue_cost_;
+  uint64_t bit_cost_;        // cached value of bit cost.
+  uint64_t literal_cost_;    // Cached values of dominant entropy costs:
+  uint64_t red_cost_;        // literal, red & blue.
+  uint64_t blue_cost_;
  uint8_t is_used_[5];       // 5 for literal, red, blue, alpha, distance
 } VP8LHistogram;

@ -64,9 +64,6 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
                         const VP8LBackwardRefs* const refs,
                         int palette_code_bits);

-// Return the size of the histogram for a given cache_bits.
-int VP8LGetHistogramSize(int cache_bits);
-
 // Set the palette_code_bits and reset the stats.
 // If init_arrays is true, the arrays are also filled with 0's.
 void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
@ -117,11 +114,11 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
                             int* const percent);

 // Returns the entropy for the symbols in the input array.
-float VP8LBitsEntropy(const uint32_t* const array, int n);
+uint64_t VP8LBitsEntropy(const uint32_t* const array, int n);

 // Estimate how many bits the combined entropy of literals and distance
 // approximately maps to.
-float VP8LHistogramEstimateBits(VP8LHistogram* const p);
+uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p);

 #ifdef __cplusplus
 }
--- a/src/enc/iterator_enc.c
+++ b/src/enc/iterator_enc.c
@ -54,7 +54,8 @@ void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
  InitLeft(it);
 }

-void VP8IteratorReset(VP8EncIterator* const it) {
+// restart a scan
+static void VP8IteratorReset(VP8EncIterator* const it) {
  VP8Encoder* const enc = it->enc_;
  VP8IteratorSetRow(it, 0);
  VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_);  // default
--- a/src/enc/predictor_enc.c
+++ b/src/enc/predictor_enc.c
@ -14,13 +14,15 @@
 //          Urvang Joshi (urvang@google.com)
 //          Vincent Rabaud (vrabaud@google.com)

+#include <string.h>
+
 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/enc/vp8li_enc.h"

 #define MAX_DIFF_COST (1e30f)
-
+#define HISTO_SIZE (4 * 256)
 static const float kSpatialPredictorBias = 15.f;
 static const int kPredLowEffort = 11;
 static const uint32_t kMaskAlpha = 0xff000000;
@ -31,8 +33,10 @@ static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
 //------------------------------------------------------------------------------
 // Methods to calculate Entropy (Shannon).

-static float PredictionCostSpatial(const int counts[256], int weight_0,
-                                   float exp_val) {
+// Compute a bias for prediction entropy using a global heuristic to favor
+// values closer to 0. Hence the final negative sign.
+static float PredictionCostBias(const uint32_t counts[256], int weight_0,
+                                float exp_val) {
  const int significant_symbols = 256 >> 4;
  const float exp_decay_factor = 0.6f;
  float bits = (float)weight_0 * counts[0];
@ -44,23 +48,33 @@ static float PredictionCostSpatial(const int counts[256], int weight_0,
  return (float)(-0.1 * bits);
 }

-static float PredictionCostSpatialHistogram(const int accumulated[4][256],
-                                            const int tile[4][256]) {
+static float PredictionCostSpatialHistogram(
+    const uint32_t accumulated[HISTO_SIZE], const uint32_t tile[HISTO_SIZE],
+    int mode, int left_mode, int above_mode) {
  int i;
  float retval = 0.f;
  for (i = 0; i < 4; ++i) {
    const float kExpValue = 0.94f;
-    retval += PredictionCostSpatial(tile[i], 1, kExpValue);
-    retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
+    retval += PredictionCostBias(&tile[i * 256], 1, kExpValue);
+    // Compute the new cost if 'tile' is added to 'accumulate' but also add the
+    // cost of the current histogram to guide the spatial predictor selection.
+    // Basically, favor low entropy, locally and globally.
+    retval += (float)VP8LCombinedShannonEntropy(&tile[i * 256],
+                                                &accumulated[i * 256]) /
+              (1ll << LOG_2_PRECISION_BITS);
  }
-  return (float)retval;
+  // Favor keeping the areas locally similar.
+  if (mode == left_mode) retval -= kSpatialPredictorBias;
+  if (mode == above_mode) retval -= kSpatialPredictorBias;
+  return retval;
 }

-static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
-  ++histo_argb[0][argb >> 24];
-  ++histo_argb[1][(argb >> 16) & 0xff];
-  ++histo_argb[2][(argb >> 8) & 0xff];
-  ++histo_argb[3][argb & 0xff];
+static WEBP_INLINE void UpdateHisto(uint32_t histo_argb[HISTO_SIZE],
+                                    uint32_t argb) {
+  ++histo_argb[0 * 256 + (argb >> 24)];
+  ++histo_argb[1 * 256 + ((argb >> 16) & 0xff)];
+  ++histo_argb[2 * 256 + ((argb >> 8) & 0xff)];
+  ++histo_argb[3 * 256 + (argb & 0xff)];
 }

 //------------------------------------------------------------------------------
@ -296,14 +310,11 @@ static WEBP_INLINE void GetResidual(
 // applied, quantizing residuals to multiples of quantization levels up to
 // max_quantization (the actual quantization level depends on smoothness near
 // the given pixel).
-static int GetBestPredictorForTile(int width, int height,
-                                   int tile_x, int tile_y, int bits,
-                                   int accumulated[4][256],
-                                   uint32_t* const argb_scratch,
-                                   const uint32_t* const argb,
-                                   int max_quantization,
-                                   int exact, int used_subtract_green,
-                                   const uint32_t* const modes) {
+static int GetBestPredictorForTile(
+    int width, int height, int tile_x, int tile_y, int bits,
+    uint32_t accumulated[HISTO_SIZE], uint32_t* const argb_scratch,
+    const uint32_t* const argb, int max_quantization, int exact,
+    int used_subtract_green, const uint32_t* const modes) {
  const int kNumPredModes = 14;
  const int start_x = tile_x << bits;
  const int start_y = tile_y << bits;
@ -333,12 +344,11 @@ static int GetBestPredictorForTile(int width, int height,
  float best_diff = MAX_DIFF_COST;
  int best_mode = 0;
  int mode;
-  int histo_stack_1[4][256];
-  int histo_stack_2[4][256];
+  uint32_t histo_stack_1[HISTO_SIZE];
+  uint32_t histo_stack_2[HISTO_SIZE];
  // Need pointers to be able to swap arrays.
-  int (*histo_argb)[256] = histo_stack_1;
-  int (*best_histo)[256] = histo_stack_2;
-  int i, j;
+  uint32_t* histo_argb = histo_stack_1;
+  uint32_t* best_histo = histo_stack_2;
  uint32_t residuals[1 << MAX_TRANSFORM_BITS];
  assert(bits <= MAX_TRANSFORM_BITS);
  assert(max_x <= (1 << MAX_TRANSFORM_BITS));
@ -383,14 +393,11 @@ static int GetBestPredictorForTile(int width, int height,
        UpdateHisto(histo_argb, residuals[relative_x]);
      }
    }
-    cur_diff = PredictionCostSpatialHistogram(
-        (const int (*)[256])accumulated, (const int (*)[256])histo_argb);
-    // Favor keeping the areas locally similar.
-    if (mode == left_mode) cur_diff -= kSpatialPredictorBias;
-    if (mode == above_mode) cur_diff -= kSpatialPredictorBias;
+    cur_diff = PredictionCostSpatialHistogram(accumulated, histo_argb, mode,
+                                              left_mode, above_mode);

    if (cur_diff < best_diff) {
-      int (*tmp)[256] = histo_argb;
+      uint32_t* tmp = histo_argb;
      histo_argb = best_histo;
      best_histo = tmp;
      best_diff = cur_diff;
@ -398,12 +405,7 @@ static int GetBestPredictorForTile(int width, int height,
    }
  }

-  for (i = 0; i < 4; i++) {
-    for (j = 0; j < 256; j++) {
-      accumulated[i][j] += best_histo[i][j];
-    }
-  }
-
+  VP8LAddVectorEq(best_histo, accumulated, HISTO_SIZE);
  return best_mode;
 }

@ -411,12 +413,12 @@ static int GetBestPredictorForTile(int width, int height,
 // If max_quantization > 1, applies near lossless processing, quantizing
 // residuals to multiples of quantization levels up to max_quantization
 // (the actual quantization level depends on smoothness near the given pixel).
-static void CopyImageWithPrediction(int width, int height,
-                                    int bits, uint32_t* const modes,
+static void CopyImageWithPrediction(int width, int height, int bits,
+                                    const uint32_t* const modes,
                                    uint32_t* const argb_scratch,
-                                    uint32_t* const argb,
-                                    int low_effort, int max_quantization,
-                                    int exact, int used_subtract_green) {
+                                    uint32_t* const argb, int low_effort,
+                                    int max_quantization, int exact,
+                                    int used_subtract_green) {
  const int tiles_per_row = VP8LSubSampleSize(width, bits);
  // The width of upper_row and current_row is one pixel larger than image width
  // to allow the top right pixel to point to the leftmost pixel of the next row
@ -469,6 +471,71 @@ static void CopyImageWithPrediction(int width, int height,
  }
 }

+// Checks whether 'image' can be subsampled by finding the biggest power of 2
+// squares (defined by 'best_bits') of uniform value it is made out of.
+static void OptimizeSampling(uint32_t* const image, int full_width,
+                             int full_height, int bits, int* best_bits_out) {
+  int width = VP8LSubSampleSize(full_width, bits);
+  int height = VP8LSubSampleSize(full_height, bits);
+  int old_width, x, y, square_size;
+  int best_bits = bits;
+  *best_bits_out = bits;
+  // Check rows first.
+  while (best_bits < MAX_TRANSFORM_BITS) {
+    const int new_square_size = 1 << (best_bits + 1 - bits);
+    int is_good = 1;
+    square_size = 1 << (best_bits - bits);
+    for (y = 0; y + square_size < height; y += new_square_size) {
+      // Check the first lines of consecutive line groups.
+      if (memcmp(&image[y * width], &image[(y + square_size) * width],
+                 width * sizeof(*image)) != 0) {
+        is_good = 0;
+        break;
+      }
+    }
+    if (is_good) {
+      ++best_bits;
+    } else {
+      break;
+    }
+  }
+  if (best_bits == bits) return;
+
+  // Check columns.
+  while (best_bits > bits) {
+    int is_good = 1;
+    square_size = 1 << (best_bits - bits);
+    for (y = 0; is_good && y < height; ++y) {
+      for (x = 0; is_good && x < width; x += square_size) {
+        int i;
+        for (i = x + 1; i < GetMin(x + square_size, width); ++i) {
+          if (image[y * width + i] != image[y * width + x]) {
+            is_good = 0;
+            break;
+          }
+        }
+      }
+    }
+    if (is_good) {
+      break;
+    }
+    --best_bits;
+  }
+  if (best_bits == bits) return;
+
+  // Subsample the image.
+  old_width = width;
+  square_size = 1 << (best_bits - bits);
+  width = VP8LSubSampleSize(full_width, best_bits);
+  height = VP8LSubSampleSize(full_height, best_bits);
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
+      image[y * width + x] = image[square_size * (y * old_width + x)];
+    }
+  }
+  *best_bits_out = best_bits;
+}
+
 // Finds the best predictor for each tile, and converts the image to residuals
 // with respect to predictions. If near_lossless_quality < 100, applies
 // near lossless processing, shaving off more bits of residuals for lower
@ -478,20 +545,20 @@ int VP8LResidualImage(int width, int height, int bits, int low_effort,
                      uint32_t* const image, int near_lossless_quality,
                      int exact, int used_subtract_green,
                      const WebPPicture* const pic, int percent_range,
-                      int* const percent) {
+                      int* const percent, int* const best_bits) {
  const int tiles_per_row = VP8LSubSampleSize(width, bits);
  const int tiles_per_col = VP8LSubSampleSize(height, bits);
  int percent_start = *percent;
-  int tile_y;
-  int histo[4][256];
  const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);
  if (low_effort) {
    int i;
    for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
      image[i] = ARGB_BLACK | (kPredLowEffort << 8);
    }
+    *best_bits = bits;
  } else {
-    memset(histo, 0, sizeof(histo));
+    int tile_y;
+    uint32_t histo[HISTO_SIZE] = { 0 };
    for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
      int tile_x;
      for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
@ -507,9 +574,10 @@ int VP8LResidualImage(int width, int height, int bits, int low_effort,
        return 0;
      }
    }
+    OptimizeSampling(image, width, height, bits, best_bits);
  }

-  CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb,
+  CopyImageWithPrediction(width, height, *best_bits, image, argb_scratch, argb,
                          low_effort, max_quantization, exact,
                          used_subtract_green);
  return WebPReportProgress(pic, percent_start + percent_range, percent);
@ -539,20 +607,21 @@ static WEBP_INLINE uint32_t MultipliersToColorCode(
         m->green_to_red_;
 }

-static float PredictionCostCrossColor(const int accumulated[256],
-                                      const int counts[256]) {
+static float PredictionCostCrossColor(const uint32_t accumulated[256],
+                                      const uint32_t counts[256]) {
  // Favor low entropy, locally and globally.
  // Favor small absolute values for PredictionCostSpatial
  static const float kExpValue = 2.4f;
-  return VP8LCombinedShannonEntropy(counts, accumulated) +
-         PredictionCostSpatial(counts, 3, kExpValue);
+  return (float)VP8LCombinedShannonEntropy(counts, accumulated) /
+             (1ll << LOG_2_PRECISION_BITS) +
+         PredictionCostBias(counts, 3, kExpValue);
 }

 static float GetPredictionCostCrossColorRed(
    const uint32_t* argb, int stride, int tile_width, int tile_height,
    VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
-    const int accumulated_red_histo[256]) {
-  int histo[256] = { 0 };
+    const uint32_t accumulated_red_histo[256]) {
+  uint32_t histo[256] = { 0 };
  float cur_diff;

  VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,
@ -571,10 +640,11 @@ static float GetPredictionCostCrossColorRed(
  return cur_diff;
 }

-static void GetBestGreenToRed(
-    const uint32_t* argb, int stride, int tile_width, int tile_height,
-    VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
-    const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) {
+static void GetBestGreenToRed(const uint32_t* argb, int stride, int tile_width,
+                              int tile_height, VP8LMultipliers prev_x,
+                              VP8LMultipliers prev_y, int quality,
+                              const uint32_t accumulated_red_histo[256],
+                              VP8LMultipliers* const best_tx) {
  const int kMaxIters = 4 + ((7 * quality) >> 8);  // in range [4..6]
  int green_to_red_best = 0;
  int iter, offset;
@ -603,9 +673,9 @@ static void GetBestGreenToRed(

 static float GetPredictionCostCrossColorBlue(
    const uint32_t* argb, int stride, int tile_width, int tile_height,
-    VP8LMultipliers prev_x, VP8LMultipliers prev_y,
-    int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) {
-  int histo[256] = { 0 };
+    VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_blue,
+    int red_to_blue, const uint32_t accumulated_blue_histo[256]) {
+  uint32_t histo[256] = { 0 };
  float cur_diff;

  VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,
@ -635,11 +705,12 @@ static float GetPredictionCostCrossColorBlue(

 #define kGreenRedToBlueNumAxis 8
 #define kGreenRedToBlueMaxIters 7
-static void GetBestGreenRedToBlue(
-    const uint32_t* argb, int stride, int tile_width, int tile_height,
-    VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
-    const int accumulated_blue_histo[256],
-    VP8LMultipliers* const best_tx) {
+static void GetBestGreenRedToBlue(const uint32_t* argb, int stride,
+                                  int tile_width, int tile_height,
+                                  VP8LMultipliers prev_x,
+                                  VP8LMultipliers prev_y, int quality,
+                                  const uint32_t accumulated_blue_histo[256],
+                                  VP8LMultipliers* const best_tx) {
  const int8_t offset[kGreenRedToBlueNumAxis][2] =
      {{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};
  const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 };
@ -684,13 +755,10 @@ static void GetBestGreenRedToBlue(
 #undef kGreenRedToBlueNumAxis

 static VP8LMultipliers GetBestColorTransformForTile(
-    int tile_x, int tile_y, int bits,
-    VP8LMultipliers prev_x,
-    VP8LMultipliers prev_y,
-    int quality, int xsize, int ysize,
-    const int accumulated_red_histo[256],
-    const int accumulated_blue_histo[256],
-    const uint32_t* const argb) {
+    int tile_x, int tile_y, int bits, VP8LMultipliers prev_x,
+    VP8LMultipliers prev_y, int quality, int xsize, int ysize,
+    const uint32_t accumulated_red_histo[256],
+    const uint32_t accumulated_blue_histo[256], const uint32_t* const argb) {
  const int max_tile_size = 1 << bits;
  const int tile_y_offset = tile_y * max_tile_size;
  const int tile_x_offset = tile_x * max_tile_size;
@ -728,13 +796,13 @@ static void CopyTileWithColorTransform(int xsize, int ysize,
 int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
                            uint32_t* const argb, uint32_t* image,
                            const WebPPicture* const pic, int percent_range,
-                            int* const percent) {
+                            int* const percent, int* const best_bits) {
  const int max_tile_size = 1 << bits;
  const int tile_xsize = VP8LSubSampleSize(width, bits);
  const int tile_ysize = VP8LSubSampleSize(height, bits);
  int percent_start = *percent;
-  int accumulated_red_histo[256] = { 0 };
-  int accumulated_blue_histo[256] = { 0 };
+  uint32_t accumulated_red_histo[256] = { 0 };
+  uint32_t accumulated_blue_histo[256] = { 0 };
  int tile_x, tile_y;
  VP8LMultipliers prev_x, prev_y;
  MultipliersClear(&prev_y);
@ -788,5 +856,6 @@ int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
      return 0;
    }
  }
+  OptimizeSampling(image, width, height, bits, best_bits);
  return 1;
 }
--- a/src/enc/quant_enc.c
+++ b/src/enc/quant_enc.c
@ -462,7 +462,7 @@ const uint16_t VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
 const uint16_t VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };

 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
-const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
+static const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
  I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
 };

@ -478,7 +478,9 @@ void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
  VP8EncPredChroma8(it->yuv_p_, left, top);
 }

-void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
+// Form all the ten Intra4x4 predictions in the yuv_p_ cache
+// for the 4x4 block it->i4_
+static void MakeIntra4Preds(const VP8EncIterator* const it) {
  VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
 }

@ -1099,7 +1101,7 @@ static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
    uint8_t* tmp_dst = it->yuv_p_ + I4TMP;    // scratch buffer.

    InitScore(&rd_i4);
-    VP8MakeIntra4Preds(it);
+    MakeIntra4Preds(it);
    for (mode = 0; mode < NUM_BMODES; ++mode) {
      VP8ModeScore rd_tmp;
      int16_t tmp_levels[16];
@ -1234,7 +1236,7 @@ static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it,
          it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
      const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
      uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
-      VP8MakeIntra4Preds(it);
+      MakeIntra4Preds(it);
      nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
                              src, dst, mode) << it->i4_;
    } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
@ -1302,7 +1304,7 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
      const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
      const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);

-      VP8MakeIntra4Preds(it);
+      MakeIntra4Preds(it);
      for (mode = 0; mode < NUM_BMODES; ++mode) {
        const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
        const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT
--- a/src/enc/vp8i_enc.h
+++ b/src/enc/vp8i_enc.h
@ -78,7 +78,6 @@ typedef enum {   // Rate-distortion optimization levels
 extern const uint16_t VP8Scan[16];
 extern const uint16_t VP8UVModeOffsets[4];
 extern const uint16_t VP8I16ModeOffsets[4];
-extern const uint16_t VP8I4ModeOffsets[NUM_BMODES];

 // Layout of prediction blocks
 // intra 16x16
@ -267,8 +266,6 @@ typedef struct {
  // in iterator.c
 // must be called first
 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
-// restart a scan
-void VP8IteratorReset(VP8EncIterator* const it);
 // reset iterator position to row 'y'
 void VP8IteratorSetRow(VP8EncIterator* const it, int y);
 // set count down (=number of iterations to go)
@ -444,9 +441,6 @@ extern const uint8_t VP8Cat6[];
 void VP8MakeLuma16Preds(const VP8EncIterator* const it);
 // Form all the four Chroma8x8 predictions in the yuv_p_ cache
 void VP8MakeChroma8Preds(const VP8EncIterator* const it);
-// Form all the ten Intra4x4 predictions in the yuv_p_ cache
-// for the 4x4 block it->i4_
-void VP8MakeIntra4Preds(const VP8EncIterator* const it);
 // Rate calculation
 int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
 int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
--- a/src/enc/vp8l_enc.c
+++ b/src/enc/vp8l_enc.c
@ -30,6 +30,7 @@

 // Maximum number of histogram images (sub-blocks).
 #define MAX_HUFF_IMAGE_SIZE       2600
+#define MAX_HUFFMAN_BITS (MIN_HUFFMAN_BITS + (1 << NUM_HUFFMAN_BITS) - 1)

 // -----------------------------------------------------------------------------
 // Palette
@ -140,8 +141,8 @@ static int AnalyzeEntropy(const uint32_t* argb,
      curr_row += argb_stride;
    }
    {
-      float entropy_comp[kHistoTotal];
-      float entropy[kNumEntropyIx];
+      uint64_t entropy_comp[kHistoTotal];
+      uint64_t entropy[kNumEntropyIx];
      int k;
      int last_mode_to_analyze = use_palette ? kPalette : kSpatialSubGreen;
      int j;
@ -179,19 +180,19 @@ static int AnalyzeEntropy(const uint32_t* argb,
      // When including transforms, there is an overhead in bits from
      // storing them. This overhead is small but matters for small images.
      // For spatial, there are 14 transformations.
-      entropy[kSpatial] += VP8LSubSampleSize(width, transform_bits) *
+      entropy[kSpatial] += (uint64_t)VP8LSubSampleSize(width, transform_bits) *
                           VP8LSubSampleSize(height, transform_bits) *
                           VP8LFastLog2(14);
      // For color transforms: 24 as only 3 channels are considered in a
      // ColorTransformElement.
-      entropy[kSpatialSubGreen] += VP8LSubSampleSize(width, transform_bits) *
-                                   VP8LSubSampleSize(height, transform_bits) *
-                                   VP8LFastLog2(24);
+      entropy[kSpatialSubGreen] +=
+          (uint64_t)VP8LSubSampleSize(width, transform_bits) *
+          VP8LSubSampleSize(height, transform_bits) * VP8LFastLog2(24);
      // For palettes, add the cost of storing the palette.
      // We empirically estimate the cost of a compressed entry as 8 bits.
      // The palette is differential-coded when compressed hence a much
      // lower cost than sizeof(uint32_t)*8.
-      entropy[kPalette] += palette_size * 8;
+      entropy[kPalette] += (palette_size * 8ull) << LOG_2_PRECISION_BITS;

      *min_entropy_ix = kDirect;
      for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) {
@ -280,7 +281,7 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
  const int method = config->method;
  const int low_effort = (config->method == 0);
  int i;
-  int use_palette;
+  int use_palette, transform_bits;
  int n_lz77s;
  // If set to 0, analyze the cache with the computed cache value. If 1, also
  // analyze with no-cache.
@ -297,7 +298,9 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
  // Empirical bit sizes.
  enc->histo_bits_ = GetHistoBits(method, use_palette,
                                  pic->width, pic->height);
-  enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_);
+  transform_bits = GetTransformBits(method, enc->histo_bits_);
+  enc->predictor_transform_bits_ = transform_bits;
+  enc->cross_color_transform_bits_ = transform_bits;

  if (low_effort) {
    // AnalyzeEntropy is somewhat slow.
@ -311,8 +314,8 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
    // Try out multiple LZ77 on images with few colors.
    n_lz77s = (enc->palette_size_ > 0 && enc->palette_size_ <= 16) ? 2 : 1;
    if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, use_palette,
-                        enc->palette_size_, enc->transform_bits_,
-                        &min_entropy_ix, red_and_blue_always_zero)) {
+                        enc->palette_size_, transform_bits, &min_entropy_ix,
+                        red_and_blue_always_zero)) {
      return 0;
    }
    if (method == 6 && config->quality == 100) {
@ -1064,54 +1067,55 @@ static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height,
  VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
 }

-static int ApplyPredictFilter(const VP8LEncoder* const enc, int width,
-                              int height, int quality, int low_effort,
+static int ApplyPredictFilter(VP8LEncoder* const enc, int width, int height,
+                              int quality, int low_effort,
                              int used_subtract_green, VP8LBitWriter* const bw,
                              int percent_range, int* const percent) {
-  const int pred_bits = enc->transform_bits_;
-  const int transform_width = VP8LSubSampleSize(width, pred_bits);
-  const int transform_height = VP8LSubSampleSize(height, pred_bits);
+  const int min_bits = enc->predictor_transform_bits_;
+  int best_bits;
  // we disable near-lossless quantization if palette is used.
  const int near_lossless_strength =
      enc->use_palette_ ? 100 : enc->config_->near_lossless;

-  if (!VP8LResidualImage(
-          width, height, pred_bits, low_effort, enc->argb_, enc->argb_scratch_,
-          enc->transform_data_, near_lossless_strength, enc->config_->exact,
-          used_subtract_green, enc->pic_, percent_range / 2, percent)) {
+  if (!VP8LResidualImage(width, height, min_bits, low_effort, enc->argb_,
+                         enc->argb_scratch_, enc->transform_data_,
+                         near_lossless_strength, enc->config_->exact,
+                         used_subtract_green, enc->pic_, percent_range / 2,
+                         percent, &best_bits)) {
    return 0;
  }
  VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
  VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
-  assert(pred_bits >= 2);
-  VP8LPutBits(bw, pred_bits - 2, 3);
+  assert(best_bits >= MIN_TRANSFORM_BITS && best_bits <= MAX_TRANSFORM_BITS);
+  VP8LPutBits(bw, best_bits - MIN_TRANSFORM_BITS, NUM_TRANSFORM_BITS);
+  enc->predictor_transform_bits_ = best_bits;
  return EncodeImageNoHuffman(
-      bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
-      (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height,
+      bw, enc->transform_data_, &enc->hash_chain_, &enc->refs_[0],
+      VP8LSubSampleSize(width, best_bits), VP8LSubSampleSize(height, best_bits),
      quality, low_effort, enc->pic_, percent_range - percent_range / 2,
      percent);
 }

-static int ApplyCrossColorFilter(const VP8LEncoder* const enc, int width,
-                                 int height, int quality, int low_effort,
+static int ApplyCrossColorFilter(VP8LEncoder* const enc, int width, int height,
+                                 int quality, int low_effort,
                                 VP8LBitWriter* const bw, int percent_range,
                                 int* const percent) {
-  const int ccolor_transform_bits = enc->transform_bits_;
-  const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
-  const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
+  const int min_bits = enc->cross_color_transform_bits_;
+  int best_bits;

-  if (!VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality,
-                               enc->argb_, enc->transform_data_, enc->pic_,
-                               percent_range / 2, percent)) {
+  if (!VP8LColorSpaceTransform(width, height, min_bits, quality, enc->argb_,
+                               enc->transform_data_, enc->pic_,
+                               percent_range / 2, percent, &best_bits)) {
    return 0;
  }
  VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
  VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2);
-  assert(ccolor_transform_bits >= 2);
-  VP8LPutBits(bw, ccolor_transform_bits - 2, 3);
+  assert(best_bits >= MIN_TRANSFORM_BITS && best_bits <= MAX_TRANSFORM_BITS);
+  VP8LPutBits(bw, best_bits - MIN_TRANSFORM_BITS, NUM_TRANSFORM_BITS);
+  enc->cross_color_transform_bits_ = best_bits;
  return EncodeImageNoHuffman(
-      bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
-      (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height,
+      bw, enc->transform_data_, &enc->hash_chain_, &enc->refs_[0],
+      VP8LSubSampleSize(width, best_bits), VP8LSubSampleSize(height, best_bits),
      quality, low_effort, enc->pic_, percent_range - percent_range / 2,
      percent);
 }
@ -1197,10 +1201,14 @@ static int AllocateTransformBuffer(VP8LEncoder* const enc, int width,
      enc->use_predict_ ? (width + 1) * 2 + (width * 2 + sizeof(uint32_t) - 1) /
                                                sizeof(uint32_t)
                        : 0;
+  const int min_transform_bits =
+      (enc->predictor_transform_bits_ < enc->cross_color_transform_bits_)
+          ? enc->predictor_transform_bits_
+          : enc->cross_color_transform_bits_;
  const uint64_t transform_data_size =
      (enc->use_predict_ || enc->use_cross_color_)
-          ? (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) *
-                VP8LSubSampleSize(height, enc->transform_bits_)
+          ? (uint64_t)VP8LSubSampleSize(width, min_transform_bits) *
+                VP8LSubSampleSize(height, min_transform_bits)
          : 0;
  const uint64_t max_alignment_in_words =
      (WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t);
@ -1374,13 +1382,11 @@ static int ApplyPalette(const uint32_t* src, uint32_t src_stride, uint32_t* dst,
 #undef APPLY_PALETTE_GREEDY_MAX

 // Note: Expects "enc->palette_" to be set properly.
-static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) {
+static int MapImageFromPalette(VP8LEncoder* const enc) {
  const WebPPicture* const pic = enc->pic_;
  const int width = pic->width;
  const int height = pic->height;
  const uint32_t* const palette = enc->palette_;
-  const uint32_t* src = in_place ? enc->argb_ : pic->argb;
-  const int src_stride = in_place ? enc->current_width_ : pic->argb_stride;
  const int palette_size = enc->palette_size_;
  int xbits;

@ -1395,9 +1401,9 @@ static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) {
  if (!AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height)) {
    return 0;
  }
-  if (!ApplyPalette(src, src_stride,
-                     enc->argb_, enc->current_width_,
-                     palette, palette_size, width, height, xbits, pic)) {
+  if (!ApplyPalette(pic->argb, pic->argb_stride, enc->argb_,
+                    enc->current_width_, palette, palette_size, width, height,
+                    xbits, pic)) {
    return 0;
  }
  enc->argb_content_ = kEncoderPalette;
@ -1405,24 +1411,31 @@ static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) {
 }

 // Save palette_[] to bitstream.
-static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort,
-                                       VP8LEncoder* const enc,
-                                       int percent_range, int* const percent) {
+static int EncodePalette(VP8LBitWriter* const bw, int low_effort,
+                         VP8LEncoder* const enc, int percent_range,
+                         int* const percent) {
  int i;
  uint32_t tmp_palette[MAX_PALETTE_SIZE];
  const int palette_size = enc->palette_size_;
  const uint32_t* const palette = enc->palette_;
+  // If the last element is 0, do not store it and count on automatic palette
+  // 0-filling. This can only happen if there is no pixel packing, hence if
+  // there are strictly more than 16 colors (after 0 is removed).
+  const uint32_t encoded_palette_size =
+      (enc->palette_[palette_size - 1] == 0 && palette_size > 17)
+          ? palette_size - 1
+          : palette_size;
  VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
  VP8LPutBits(bw, COLOR_INDEXING_TRANSFORM, 2);
  assert(palette_size >= 1 && palette_size <= MAX_PALETTE_SIZE);
-  VP8LPutBits(bw, palette_size - 1, 8);
-  for (i = palette_size - 1; i >= 1; --i) {
+  VP8LPutBits(bw, encoded_palette_size - 1, 8);
+  for (i = encoded_palette_size - 1; i >= 1; --i) {
    tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
  }
  tmp_palette[0] = palette[0];
-  return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_,
-                              &enc->refs_[0], palette_size, 1, /*quality=*/20,
-                              low_effort, enc->pic_, percent_range, percent);
+  return EncodeImageNoHuffman(
+      bw, tmp_palette, &enc->hash_chain_, &enc->refs_[0], encoded_palette_size,
+      1, /*quality=*/20, low_effort, enc->pic_, percent_range, percent);
 }

 // -----------------------------------------------------------------------------
@ -1493,7 +1506,6 @@ static int EncodeStreamHook(void* input, void* data2) {
 #endif
  int hdr_size = 0;
  int data_size = 0;
-  int use_delta_palette = 0;
  int idx;
  size_t best_size = ~(size_t)0;
  VP8LBitWriter bw_init = *bw, bw_best;
@ -1558,45 +1570,43 @@ static int EncodeStreamHook(void* input, void* data2) {
        goto Error;
      }
      remaining_percent -= percent_range;
-      if (!MapImageFromPalette(enc, use_delta_palette)) goto Error;
+      if (!MapImageFromPalette(enc)) goto Error;
      // If using a color cache, do not have it bigger than the number of
      // colors.
      if (enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) {
        enc->cache_bits_ = BitsLog2Floor(enc->palette_size_) + 1;
      }
    }
-    if (!use_delta_palette) {
-      // In case image is not packed.
-      if (enc->argb_content_ != kEncoderNearLossless &&
-          enc->argb_content_ != kEncoderPalette) {
-        if (!MakeInputImageCopy(enc)) goto Error;
-      }
+    // In case image is not packed.
+    if (enc->argb_content_ != kEncoderNearLossless &&
+        enc->argb_content_ != kEncoderPalette) {
+      if (!MakeInputImageCopy(enc)) goto Error;
+    }

-      // -----------------------------------------------------------------------
-      // Apply transforms and write transform data.
+    // -------------------------------------------------------------------------
+    // Apply transforms and write transform data.

-      if (enc->use_subtract_green_) {
-        ApplySubtractGreen(enc, enc->current_width_, height, bw);
-      }
+    if (enc->use_subtract_green_) {
+      ApplySubtractGreen(enc, enc->current_width_, height, bw);
+    }

-      if (enc->use_predict_) {
-        percent_range = remaining_percent / 3;
-        if (!ApplyPredictFilter(enc, enc->current_width_, height, quality,
-                                low_effort, enc->use_subtract_green_, bw,
-                                percent_range, &percent)) {
-          goto Error;
-        }
-        remaining_percent -= percent_range;
+    if (enc->use_predict_) {
+      percent_range = remaining_percent / 3;
+      if (!ApplyPredictFilter(enc, enc->current_width_, height, quality,
+                              low_effort, enc->use_subtract_green_, bw,
+                              percent_range, &percent)) {
+        goto Error;
      }
+      remaining_percent -= percent_range;
+    }

-      if (enc->use_cross_color_) {
-        percent_range = remaining_percent / 2;
-        if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality,
-                                   low_effort, bw, percent_range, &percent)) {
-          goto Error;
-        }
-        remaining_percent -= percent_range;
+    if (enc->use_cross_color_) {
+      percent_range = remaining_percent / 2;
+      if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality,
+                                 low_effort, bw, percent_range, &percent)) {
+        goto Error;
      }
+      remaining_percent -= percent_range;
    }

    VP8LPutBits(bw, !TRANSFORM_PRESENT, 1);  // No more transforms.
@ -1625,7 +1635,8 @@ static int EncodeStreamHook(void* input, void* data2) {
        if (enc->use_subtract_green_) stats->lossless_features |= 4;
        if (enc->use_palette_) stats->lossless_features |= 8;
        stats->histogram_bits = enc->histo_bits_;
-        stats->transform_bits = enc->transform_bits_;
+        stats->transform_bits = enc->predictor_transform_bits_;
+        stats->cross_color_transform_bits = enc->cross_color_transform_bits_;
        stats->cache_bits = enc->cache_bits_;
        stats->palette_size = enc->palette_size_;
        stats->lossless_size = (int)(best_size - byte_position);
@ -1735,7 +1746,10 @@ int VP8LEncodeStream(const WebPConfig* const config,
        }
        // Copy the values that were computed for the main encoder.
        enc_side->histo_bits_ = enc_main->histo_bits_;
-        enc_side->transform_bits_ = enc_main->transform_bits_;
+        enc_side->predictor_transform_bits_ =
+            enc_main->predictor_transform_bits_;
+        enc_side->cross_color_transform_bits_ =
+            enc_main->cross_color_transform_bits_;
        enc_side->palette_size_ = enc_main->palette_size_;
        memcpy(enc_side->palette_, enc_main->palette_,
               sizeof(enc_main->palette_));
--- a/src/enc/vp8li_enc.h
+++ b/src/enc/vp8li_enc.h
@ -34,7 +34,7 @@ extern "C" {
 #endif

 // maximum value of transform_bits_ in VP8LEncoder.
-#define MAX_TRANSFORM_BITS 6
+#define MAX_TRANSFORM_BITS (MIN_TRANSFORM_BITS + (1 << NUM_TRANSFORM_BITS) - 1)

 typedef enum {
  kEncoderNone = 0,
@ -59,7 +59,8 @@ typedef struct {

  // Encoding parameters derived from quality parameter.
  int histo_bits_;
-  int transform_bits_;    // <= MAX_TRANSFORM_BITS.
+  int predictor_transform_bits_;    // <= MAX_TRANSFORM_BITS
+  int cross_color_transform_bits_;  // <= MAX_TRANSFORM_BITS
  int cache_bits_;        // If equal to 0, don't use color cache.

  // Encoding parameters derived from image characteristics.
@ -106,14 +107,15 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
 // Returns false in case of error (stored in pic->error_code).
 int VP8LResidualImage(int width, int height, int bits, int low_effort,
                      uint32_t* const argb, uint32_t* const argb_scratch,
-                      uint32_t* const image, int near_lossless, int exact,
-                      int used_subtract_green, const WebPPicture* const pic,
-                      int percent_range, int* const percent);
+                      uint32_t* const image, int near_lossless_quality,
+                      int exact, int used_subtract_green,
+                      const WebPPicture* const pic, int percent_range,
+                      int* const percent, int* const best_bits);

 int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
                            uint32_t* const argb, uint32_t* image,
                            const WebPPicture* const pic, int percent_range,
-                            int* const percent);
+                            int* const percent, int* const best_bits);

 //------------------------------------------------------------------------------

--- a/src/utils/bit_reader_utils.c
+++ b/src/utils/bit_reader_utils.c
@ -124,7 +124,8 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits,

 #if defined(__arm__) || defined(_M_ARM) || WEBP_AARCH64 || \
    defined(__i386__) || defined(_M_IX86) || \
-    defined(__x86_64__) || defined(_M_X64)
+    defined(__x86_64__) || defined(_M_X64) || \
+    defined(__wasm__)
 #define VP8L_USE_FAST_LOAD
 #endif

--- a/src/utils/bit_reader_utils.h
+++ b/src/utils/bit_reader_utils.h
@ -69,6 +69,8 @@ extern "C" {
 #define BITS 56
 #elif defined(__mips__)                        // MIPS
 #define BITS 24
+#elif defined(__wasm__)                        // WASM
+#define BITS 56
 #else                                          // reasonable default
 #define BITS 24
 #endif
--- a/src/utils/palette.c
+++ b/src/utils/palette.c
@ -191,6 +191,12 @@ static void PaletteSortMinimizeDeltas(const uint32_t* const palette_sorted,
  // Find greedily always the closest color of the predicted color to minimize
  // deltas in the palette. This reduces storage needs since the
  // palette is stored with delta encoding.
+  if (num_colors > 17) {
+    if (palette[0] == 0) {
+      --num_colors;
+      SwapColor(&palette[num_colors], &palette[0]);
+    }
+  }
  for (i = 0; i < num_colors; ++i) {
    int best_ix = i;
    uint32_t best_score = ~0U;
@ -384,8 +390,13 @@ int PaletteSort(PaletteSorting method, const struct WebPPicture* const pic,
                uint32_t* const palette) {
  switch (method) {
    case kSortedDefault:
-      // Nothing to do, we have already sorted the palette.
-      memcpy(palette, palette_sorted, num_colors * sizeof(*palette));
+      if (palette_sorted[0] == 0 && num_colors > 17) {
+        memcpy(palette, palette_sorted + 1,
+               (num_colors - 1) * sizeof(*palette_sorted));
+        palette[num_colors - 1] = 0;
+      } else {
+        memcpy(palette, palette_sorted, num_colors * sizeof(*palette));
+      }
      return 1;
    case kMinimizeDelta:
      PaletteSortMinimizeDeltas(palette_sorted, num_colors, palette);
--- a/src/utils/palette.h
+++ b/src/utils/palette.h
@ -53,6 +53,8 @@ int GetColorPalette(const struct WebPPicture* const pic,
 // Sorts the palette according to the criterion defined by 'method'.
 // 'palette_sorted' is the input palette sorted lexicographically, as done in
 // PrepareMapToPalette. Returns 0 on memory allocation error.
+// For kSortedDefault and kMinimizeDelta methods, 0 (if present) is set as the
+// last element to optimize later storage.
 int PaletteSort(PaletteSorting method, const struct WebPPicture* const pic,
                const uint32_t* const palette_sorted, uint32_t num_colors,
                uint32_t* const palette);
--- a/src/webp/encode.h
+++ b/src/webp/encode.h
@ -20,7 +20,7 @@
 extern "C" {
 #endif

-#define WEBP_ENCODER_ABI_VERSION 0x020f    // MAJOR(8b) + MINOR(8b)
+#define WEBP_ENCODER_ABI_VERSION 0x0210  // MAJOR(8b) + MINOR(8b)

 // Note: forward declaring enumerations is not allowed in (strict) C and C++,
 // the types are left here for reference.
@ -145,7 +145,7 @@ struct WebPConfig {
                          // RGB information for better compression. The default
                          // value is 0.

-  int use_delta_palette;  // reserved for future lossless feature
+  int use_delta_palette;  // reserved
  int use_sharp_yuv;      // if needed, use sharp (and slow) RGB->YUV conversion

  int qmin;               // minimum permissible quality factor
@ -224,14 +224,15 @@ struct WebPAuxStats {
  uint32_t lossless_features;  // bit0:predictor bit1:cross-color transform
                               // bit2:subtract-green bit3:color indexing
  int histogram_bits;          // number of precision bits of histogram
-  int transform_bits;          // precision bits for transform
+  int transform_bits;          // precision bits for predictor transform
  int cache_bits;              // number of bits for color cache lookup
  int palette_size;            // number of color in palette, if used
  int lossless_size;           // final lossless size
  int lossless_hdr_size;       // lossless header (transform, huffman etc) size
  int lossless_data_size;      // lossless image data size
+  int cross_color_transform_bits;  // precision bits for cross-color transform

-  uint32_t pad[2];        // padding for later use
+  uint32_t pad[1];  // padding for later use
 };

 // Signature for output function. Should return true if writing was successful.
--- a/src/webp/format_constants.h
+++ b/src/webp/format_constants.h
@ -46,7 +46,12 @@
 #define CODE_LENGTH_CODES            19

 #define MIN_HUFFMAN_BITS             2  // min number of Huffman bits
-#define MAX_HUFFMAN_BITS             9  // max number of Huffman bits
+#define NUM_HUFFMAN_BITS             3
+
+// the maximum number of bits defining a transform is
+// MIN_TRANSFORM_BITS + (1 << NUM_TRANSFORM_BITS) - 1
+#define MIN_TRANSFORM_BITS           2
+#define NUM_TRANSFORM_BITS           3

 #define TRANSFORM_PRESENT            1  // The bit to be written when next data
                                        // to be read is a transform.
--- a/src/webp/types.h
+++ b/src/webp/types.h
@ -36,22 +36,28 @@ typedef long long int int64_t;
 #define WEBP_INLINE __forceinline
 #endif  /* _MSC_VER */

-#if defined(WEBP_ENABLE_NODISCARD) ||                   \
-    (defined(__cplusplus) && __cplusplus >= 201700L) || \
+#ifndef WEBP_NODISCARD
+#if defined(WEBP_ENABLE_NODISCARD) && WEBP_ENABLE_NODISCARD
+#if (defined(__cplusplus) && __cplusplus >= 201703L) || \
    (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L)
 #define WEBP_NODISCARD [[nodiscard]]
 #else
-// gcc's __has_attribute does not work for enums.
+// gcc's __attribute__((warn_unused_result)) does not work for enums.
 #if defined(__clang__) && defined(__has_attribute)
 #if __has_attribute(warn_unused_result)
 #define WEBP_NODISCARD __attribute__((warn_unused_result))
 #else
 #define WEBP_NODISCARD
-#endif
+#endif  /* __has_attribute(warn_unused_result) */
 #else
 #define WEBP_NODISCARD
-#endif
-#endif
+#endif  /* defined(__clang__) && defined(__has_attribute) */
+#endif  /* (defined(__cplusplus) && __cplusplus >= 201700L) ||
+           (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) */
+#else
+#define WEBP_NODISCARD
+#endif  /* defined(WEBP_ENABLE_NODISCARD) && WEBP_ENABLE_NODISCARD */
+#endif  /* WEBP_NODISCARD */

 #ifndef WEBP_EXTERN
 // This explicitly marks library functions and allows for changing the
--- a/tests/README.md
+++ b/tests/README.md
@ -11,8 +11,9 @@ https://chromium.googlesource.com/webm/libwebp-test-data
 Follow the [build instructions](../doc/building.md) for libwebp, optionally
 adding build flags for various sanitizers (e.g., -fsanitize=address).

-`fuzzer/makefile.unix` can then be used to compile the fuzzer targets:
+`-DWEBP_BUILD_FUZZTEST=ON` can then be used to compile the fuzzer targets:

 ```shell
-$ make -C fuzzer -f makefile.unix
+$ cmake -B ./build -S . -DWEBP_BUILD_FUZZTEST=ON
+$ make -C build
 ```
--- a/tests/fuzzer/CMakeLists.txt
+++ b/tests/fuzzer/CMakeLists.txt
@ -0,0 +1,69 @@
+#  Copyright (c) 2024 Google LLC
+#
+#  Use of this source code is governed by a BSD-style license
+#  that can be found in the LICENSE file in the root of the source
+#  tree. An additional intellectual property rights grant can be found
+#  in the file PATENTS.  All contributing project authors may
+#  be found in the AUTHORS file in the root of the source tree.
+
+# Adds a fuzztest from file TEST_NAME.cc located in the gtest folder. Extra
+# arguments are considered as extra source files.
+
+if(CMAKE_VERSION VERSION_LESS "3.19.0")
+  return()
+endif()
+
+macro(add_webp_fuzztest TEST_NAME)
+  add_executable(${TEST_NAME} ${TEST_NAME}.cc)
+  # FuzzTest bundles GoogleTest so no need to link to gtest libraries.
+  target_link_libraries(${TEST_NAME} PRIVATE fuzz_utils webp ${ARGN})
+  target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_BINARY_DIR}/src)
+  link_fuzztest(${TEST_NAME})
+  add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME})
+  set_property(
+    TEST ${TEST_NAME}
+    PROPERTY ENVIRONMENT "TEST_DATA_DIRS=${CMAKE_CURRENT_SOURCE_DIR}/data/")
+endmacro()
+
+enable_language(CXX)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+include(FetchContent)
+
+set(FETCHCONTENT_QUIET FALSE)
+set(fuzztest_SOURCE_DIR ${CMAKE_BINARY_DIR}/_deps/fuzztest-src)
+FetchContent_Declare(
+  fuzztest
+  GIT_REPOSITORY https://github.com/google/fuzztest.git
+  GIT_TAG 078ea0871cc96d3a69bad406577f176a4fa14ae9
+  GIT_PROGRESS TRUE
+  PATCH_COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/patch.sh)
+
+FetchContent_MakeAvailable(fuzztest)
+
+fuzztest_setup_fuzzing_flags()
+
+add_library(fuzz_utils fuzz_utils.h fuzz_utils.cc img_alpha.h img_grid.h
+                       img_peak.h)
+target_link_libraries(fuzz_utils PUBLIC webpdecoder)
+link_fuzztest(fuzz_utils)
+
+add_webp_fuzztest(advanced_api_fuzzer)
+add_webp_fuzztest(dec_fuzzer)
+add_webp_fuzztest(enc_dec_fuzzer)
+add_webp_fuzztest(enc_fuzzer imagedec)
+add_webp_fuzztest(huffman_fuzzer)
+add_webp_fuzztest(imageio_fuzzer imagedec)
+add_webp_fuzztest(simple_api_fuzzer)
+
+if(WEBP_BUILD_LIBWEBPMUX)
+  add_webp_fuzztest(animation_api_fuzzer webpdemux)
+  add_webp_fuzztest(animdecoder_fuzzer imageioutil webpdemux)
+  add_webp_fuzztest(animencoder_fuzzer libwebpmux)
+  add_webp_fuzztest(mux_demux_api_fuzzer libwebpmux webpdemux)
+endif()
+
+if(WEBP_BUILD_WEBPINFO)
+  add_webp_fuzztest(webp_info_fuzzer imageioutil)
+endif()
--- a/tests/fuzzer/advanced_api_fuzzer.cc
+++ b/tests/fuzzer/advanced_api_fuzzer.cc
@ -14,54 +14,60 @@
 //
 ////////////////////////////////////////////////////////////////////////////////

-#include <stdint.h>
-#include <string.h>
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <string_view>

 #include "./fuzz_utils.h"
+#include "src/dec/webpi_dec.h"
 #include "src/utils/rescaler_utils.h"
 #include "src/webp/decode.h"

-int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
+namespace {
+
+void AdvancedApiTest(std::string_view blob, uint8_t factor_u8, bool flip,
+                     bool bypass_filtering, bool no_fancy_upsampling,
+                     bool use_threads, bool use_cropping, bool use_scaling,
+                     bool use_dithering, int colorspace, bool incremental) {
  WebPDecoderConfig config;
-  if (!WebPInitDecoderConfig(&config)) return 0;
-  if (WebPGetFeatures(data, size, &config.input) != VP8_STATUS_OK) return 0;
-  if ((size_t)config.input.width * config.input.height > kFuzzPxLimit) return 0;
+  if (!WebPInitDecoderConfig(&config)) return;
+  const uint8_t* const data = reinterpret_cast<const uint8_t*>(blob.data());
+  const size_t size = blob.size();
+  if (WebPGetFeatures(data, size, &config.input) != VP8_STATUS_OK) return;
+  if ((size_t)config.input.width * config.input.height >
+      fuzz_utils::kFuzzPxLimit) {
+    return;
+  }

  // Using two independent criteria ensures that all combinations of options
  // can reach each path at the decoding stage, with meaningful differences.

-  const uint8_t value = FuzzHash(data, size);
-  const float factor = value / 255.f;  // 0-1
+  const uint8_t value = fuzz_utils::FuzzHash(data, size);
+  const float factor = factor_u8 / 255.f;  // 0-1

-  config.options.flip = value & 1;
-  config.options.bypass_filtering = value & 2;
-  config.options.no_fancy_upsampling = value & 4;
-  config.options.use_threads = value & 8;
-  if (size & 1) {
+  config.options.flip = flip;
+  config.options.bypass_filtering = bypass_filtering;
+  config.options.no_fancy_upsampling = no_fancy_upsampling;
+  config.options.use_threads = use_threads;
+  if (use_cropping) {
    config.options.use_cropping = 1;
    config.options.crop_width = (int)(config.input.width * (1 - factor));
    config.options.crop_height = (int)(config.input.height * (1 - factor));
    config.options.crop_left = config.input.width - config.options.crop_width;
    config.options.crop_top = config.input.height - config.options.crop_height;
  }
-  if (size & 2) {
+  if (use_dithering) {
    int strength = (int)(factor * 100);
    config.options.dithering_strength = strength;
    config.options.alpha_dithering_strength = 100 - strength;
  }
-  if (size & 4) {
+  if (use_scaling) {
    config.options.use_scaling = 1;
    config.options.scaled_width = (int)(config.input.width * factor * 2);
    config.options.scaled_height = (int)(config.input.height * factor * 2);
  }
-
-#if defined(WEBP_REDUCE_CSP)
-  config.output.colorspace = (value & 1)
-                                 ? ((value & 2) ? MODE_RGBA : MODE_BGRA)
-                                 : ((value & 2) ? MODE_rgbA : MODE_bgrA);
-#else
-  config.output.colorspace = (WEBP_CSP_MODE)(value % MODE_LAST);
-#endif  // WEBP_REDUCE_CSP
+  config.output.colorspace = static_cast<WEBP_CSP_MODE>(colorspace);

  for (int i = 0; i < 2; ++i) {
    if (i == 1) {
@ -75,12 +81,25 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {

      // Skip easily avoidable out-of-memory fuzzing errors.
      if (config.options.use_scaling) {
+        int input_width = config.input.width;
+        int input_height = config.input.height;
+        if (config.options.use_cropping) {
+          const int cw = config.options.crop_width;
+          const int ch = config.options.crop_height;
+          const int x = config.options.crop_left & ~1;
+          const int y = config.options.crop_top & ~1;
+          if (WebPCheckCropDimensions(input_width, input_height, x, y, cw,
+                                      ch)) {
+            input_width = cw;
+            input_height = ch;
+          }
+        }
+
        int scaled_width = config.options.scaled_width;
        int scaled_height = config.options.scaled_height;
-        if (WebPRescalerGetScaledDimensions(config.input.width,
-                                            config.input.height, &scaled_width,
-                                            &scaled_height)) {
-          size_t fuzz_px_limit = kFuzzPxLimit;
+        if (WebPRescalerGetScaledDimensions(input_width, input_height,
+                                            &scaled_width, &scaled_height)) {
+          size_t fuzz_px_limit = fuzz_utils::kFuzzPxLimit;
          if (scaled_width != config.input.width ||
              scaled_height != config.input.height) {
            // Using the WebPRescalerImport internally can significantly slow
@ -92,18 +111,18 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
          // very wide input image to a very tall canvas can be as slow as
          // decoding a huge number of pixels. Avoid timeouts due to these.
          const uint64_t max_num_operations =
-              (uint64_t)Max(scaled_width, config.input.width) *
-              Max(scaled_height, config.input.height);
+              (uint64_t)std::max(scaled_width, config.input.width) *
+              std::max(scaled_height, config.input.height);
          if (max_num_operations > fuzz_px_limit) {
            break;
          }
        }
      }
    }
-    if (size % 3) {
+    if (incremental) {
      // Decodes incrementally in chunks of increasing size.
      WebPIDecoder* idec = WebPIDecode(NULL, 0, &config);
-      if (!idec) return 0;
+      if (!idec) return;
      VP8StatusCode status;
      if (size & 8) {
        size_t available_size = value + 1;
@ -135,5 +154,28 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {

    WebPFreeDecBuffer(&config.output);
  }
-  return 0;
 }
+
+}  // namespace
+
+FUZZ_TEST(AdvancedApi, AdvancedApiTest)
+    .WithDomains(
+        fuzztest::String()
+            .WithMaxSize(fuzz_utils::kMaxWebPFileSize + 1),
+        /*factor_u8=*/fuzztest::Arbitrary<uint8_t>(),
+        /*flip=*/fuzztest::Arbitrary<bool>(),
+        /*bypass_filtering=*/fuzztest::Arbitrary<bool>(),
+        /*no_fancy_upsampling=*/fuzztest::Arbitrary<bool>(),
+        /*use_threads=*/fuzztest::Arbitrary<bool>(),
+        /*use_cropping=*/fuzztest::Arbitrary<bool>(),
+        /*use_scaling=*/fuzztest::Arbitrary<bool>(),
+        /*use_dithering=*/fuzztest::Arbitrary<bool>(),
+#if defined(WEBP_REDUCE_CSP)
+        fuzztest::ElementOf<int>({static_cast<int>(MODE_RGBA),
+                                  static_cast<int>(MODE_BGRA),
+                                  static_cast<int>(MODE_rgbA),
+                                  static_cast<int>(MODE_bgrA)}),
+#else
+        fuzztest::InRange<int>(0, static_cast<int>(MODE_LAST) - 1),
+#endif
+        /*incremental=*/fuzztest::Arbitrary<bool>());
--- a/tests/fuzzer/animation_api_fuzzer.cc
+++ b/tests/fuzzer/animation_api_fuzzer.cc
@ -14,37 +14,46 @@
 //
 ////////////////////////////////////////////////////////////////////////////////

+#include <cstddef>
+#include <cstdint>
+#include <string_view>
+
 #include "./fuzz_utils.h"
 #include "src/webp/decode.h"
 #include "src/webp/demux.h"
 #include "src/webp/mux_types.h"

-int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
+namespace {
+
+void AnimationApiTest(std::string_view blob, bool use_threads,
+                      WEBP_CSP_MODE color_mode) {
+  const size_t size = blob.size();
  WebPData webp_data;
  WebPDataInit(&webp_data);
  webp_data.size = size;
-  webp_data.bytes = data;
+  webp_data.bytes = reinterpret_cast<const uint8_t*>(blob.data());

  // WebPAnimDecoderNew uses WebPDemux internally to calloc canvas size.
  WebPDemuxer* const demux = WebPDemux(&webp_data);
-  if (!demux) return 0;
+  if (!demux) return;
  const uint32_t cw = WebPDemuxGetI(demux, WEBP_FF_CANVAS_WIDTH);
  const uint32_t ch = WebPDemuxGetI(demux, WEBP_FF_CANVAS_HEIGHT);
-  if ((size_t)cw * ch > kFuzzPxLimit) {
+  if ((size_t)cw * ch > fuzz_utils::kFuzzPxLimit) {
    WebPDemuxDelete(demux);
-    return 0;
+    return;
  }

  // In addition to canvas size, check each frame separately.
  WebPIterator iter;
-  for (int i = 0; i < kFuzzFrameLimit; i++) {
+  for (int i = 0; i < fuzz_utils::kFuzzFrameLimit; i++) {
    if (!WebPDemuxGetFrame(demux, i + 1, &iter)) break;
    int w, h;
    if (WebPGetInfo(iter.fragment.bytes, iter.fragment.size, &w, &h)) {
-      if ((size_t)w * h > kFuzzPxLimit) {  // image size of the frame payload
+      if ((size_t)w * h >
+          fuzz_utils::kFuzzPxLimit) {  // image size of the frame payload
        WebPDemuxReleaseIterator(&iter);
        WebPDemuxDelete(demux);
-        return 0;
+        return;
      }
    }
  }
@ -53,26 +62,30 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
  WebPDemuxDelete(demux);

  WebPAnimDecoderOptions dec_options;
-  if (!WebPAnimDecoderOptionsInit(&dec_options)) return 0;
+  if (!WebPAnimDecoderOptionsInit(&dec_options)) return;

-  dec_options.use_threads = size & 1;
-  // Animations only support 4 (of 12) modes.
-  dec_options.color_mode = (WEBP_CSP_MODE)(size % MODE_LAST);
-  if (dec_options.color_mode != MODE_BGRA &&
-      dec_options.color_mode != MODE_rgbA &&
-      dec_options.color_mode != MODE_bgrA) {
-    dec_options.color_mode = MODE_RGBA;
-  }
+  dec_options.use_threads = use_threads;
+  dec_options.color_mode = color_mode;

  WebPAnimDecoder* dec = WebPAnimDecoderNew(&webp_data, &dec_options);
-  if (!dec) return 0;
+  if (!dec) return;

-  for (int i = 0; i < kFuzzFrameLimit; i++) {
+  for (int i = 0; i < fuzz_utils::kFuzzFrameLimit; i++) {
    uint8_t* buf;
    int timestamp;
    if (!WebPAnimDecoderGetNext(dec, &buf, &timestamp)) break;
  }

  WebPAnimDecoderDelete(dec);
-  return 0;
 }
+
+}  // namespace
+
+FUZZ_TEST(AnimationApi, AnimationApiTest)
+    .WithDomains(
+        fuzztest::String()
+            .WithMaxSize(fuzz_utils::kMaxWebPFileSize + 1),
+        /*use_threads=*/fuzztest::Arbitrary<bool>(),
+        // Animations only support 4 (out of 12) modes.
+        fuzztest::ElementOf<WEBP_CSP_MODE>({MODE_RGBA, MODE_BGRA, MODE_rgbA,
+                                            MODE_bgrA}));
--- a/tests/fuzzer/animdecoder_fuzzer.cc
+++ b/tests/fuzzer/animdecoder_fuzzer.cc
@ -16,13 +16,20 @@

 #include <cstddef>
 #include <cstdint>
+#include <string_view>

+#include "./fuzz_utils.h"
 #include "imageio/imageio_util.h"
 #include "src/webp/decode.h"
 #include "src/webp/demux.h"
 #include "src/webp/mux_types.h"

-extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+namespace {
+
+void AnimDecoderTest(std::string_view blob) {
+  const uint8_t* const data = reinterpret_cast<const uint8_t*>(blob.data());
+  const size_t size = blob.size();
+
  // WebPAnimDecoderGetInfo() is too late to check the canvas size as
  // WebPAnimDecoderNew() will handle the allocations.
  const size_t kMaxNumBytes = 2684354560;  // RSS (resident set size) limit.
@ -34,14 +41,14 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
                                             features.height) ||
        static_cast<size_t>(features.width) * features.height >
            kMaxNumPixelsSafe) {
-      return 0;
+      return;
    }
  }

  // decode everything as an animation
  WebPData webp_data = {data, size};
  WebPAnimDecoder* const dec = WebPAnimDecoderNew(&webp_data, nullptr);
-  if (dec == nullptr) return 0;
+  if (dec == nullptr) return;

  WebPAnimInfo info;
  if (!WebPAnimDecoderGetInfo(dec, &info)) goto End;
@ -57,5 +64,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
  }
 End:
  WebPAnimDecoderDelete(dec);
-  return 0;
 }
+
+}  // namespace
+
+FUZZ_TEST(AnimDecoder, AnimDecoderTest)
+    .WithDomains(
+        fuzztest::String()
+            .WithMaxSize(fuzz_utils::kMaxWebPFileSize + 1));
--- a/tests/fuzzer/animencoder_fuzzer.cc
+++ b/tests/fuzzer/animencoder_fuzzer.cc
@ -14,21 +14,48 @@
 //
 ////////////////////////////////////////////////////////////////////////////////

-#include <stdio.h>
-#include <stdlib.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <string_view>
+#include <utility>
+#include <vector>

 #include "./fuzz_utils.h"
+#include "src/dsp/cpu.h"
 #include "src/webp/encode.h"
 #include "src/webp/mux.h"
+#include "src/webp/mux_types.h"

 namespace {

-const VP8CPUInfo default_VP8GetCPUInfo = VP8GetCPUInfo;
+const VP8CPUInfo default_VP8GetCPUInfo = fuzz_utils::VP8GetCPUInfo;
+
+struct FrameConfig {
+  int use_argb;
+  int timestamp;
+  WebPConfig webp_config;
+  fuzz_utils::CropOrScaleParams crop_or_scale_params;
+  int source_image_index;
+};
+
+auto ArbitraryKMinKMax() {
+  return fuzztest::FlatMap(
+      [](int kmax) {
+        const int min_kmin = (kmax > 1) ? (kmax / 2) : 0;
+        const int max_kmin = (kmax > 1) ? (kmax - 1) : 0;
+        return fuzztest::PairOf(fuzztest::InRange(min_kmin, max_kmin),
+                                fuzztest::Just(kmax));
+      },
+      fuzztest::InRange(0, 15));
+}

 int AddFrame(WebPAnimEncoder** const enc,
             const WebPAnimEncoderOptions& anim_config, int* const width,
-             int* const height, int timestamp_ms, const uint8_t data[],
-             size_t size, uint32_t* const bit_pos) {
+             int* const height, int timestamp_ms,
+             const FrameConfig& frame_config, const uint8_t data[], size_t size,
+             uint32_t* const bit_pos) {
  if (enc == nullptr || width == nullptr || height == nullptr) {
    fprintf(stderr, "NULL parameters.\n");
    if (enc != nullptr) WebPAnimEncoderDelete(*enc);
@ -36,27 +63,12 @@ int AddFrame(WebPAnimEncoder** const enc,
  }

  // Init the source picture.
-  WebPPicture pic;
-  if (!WebPPictureInit(&pic)) {
-    fprintf(stderr, "WebPPictureInit failed.\n");
-    WebPAnimEncoderDelete(*enc);
-    abort();
-  }
-  pic.use_argb = Extract(1, data, size, bit_pos);
-
-  // Read the source picture.
-  if (!ExtractSourcePicture(&pic, data, size, bit_pos)) {
-    const WebPEncodingError error_code = pic.error_code;
-    WebPAnimEncoderDelete(*enc);
-    WebPPictureFree(&pic);
-    if (error_code == VP8_ENC_ERROR_OUT_OF_MEMORY) return 0;
-    fprintf(stderr, "Can't read input image. Error code: %d\n", error_code);
-    abort();
-  }
+  WebPPicture pic = fuzz_utils::GetSourcePicture(
+      frame_config.source_image_index, frame_config.use_argb);

  // Crop and scale.
  if (*enc == nullptr) {  // First frame will set canvas width and height.
-    if (!ExtractAndCropOrScale(&pic, data, size, bit_pos)) {
+    if (!fuzz_utils::CropOrScale(&pic, frame_config.crop_or_scale_params)) {
      const WebPEncodingError error_code = pic.error_code;
      WebPPictureFree(&pic);
      if (error_code == VP8_ENC_ERROR_OUT_OF_MEMORY) return 0;
@ -89,13 +101,7 @@ int AddFrame(WebPAnimEncoder** const enc,
  }

  // Create frame encoding config.
-  WebPConfig config;
-  if (!ExtractWebPConfig(&config, data, size, bit_pos)) {
-    fprintf(stderr, "ExtractWebPConfig failed.\n");
-    WebPAnimEncoderDelete(*enc);
-    WebPPictureFree(&pic);
-    abort();
-  }
+  WebPConfig config = frame_config.webp_config;
  // Skip slow settings on big images, it's likely to timeout.
  if (pic.width * pic.height > 32 * 32) {
    config.method = (config.method > 4) ? 4 : config.method;
@ -125,14 +131,17 @@ int AddFrame(WebPAnimEncoder** const enc,
  return 1;
 }

-}  // namespace
-
-extern "C" int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
+void AnimEncoderTest(std::string_view blob, bool minimize_size,
+                     std::pair<int, int> kmin_kmax, bool allow_mixed,
+                     const std::vector<FrameConfig>& frame_configs,
+                     int optimization_index) {
  WebPAnimEncoder* enc = nullptr;
  int width = 0, height = 0, timestamp_ms = 0;
  uint32_t bit_pos = 0;
+  const uint8_t* const data = reinterpret_cast<const uint8_t*>(blob.data());
+  const size_t size = blob.size();

-  ExtractAndDisableOptimizations(default_VP8GetCPUInfo, data, size, &bit_pos);
+  fuzz_utils::SetOptimization(default_VP8GetCPUInfo, optimization_index);

  // Extract a configuration from the packed bits.
  WebPAnimEncoderOptions anim_config;
@ -140,26 +149,20 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
    fprintf(stderr, "WebPAnimEncoderOptionsInit failed.\n");
    abort();
  }
-  anim_config.minimize_size = Extract(1, data, size, &bit_pos);
-  anim_config.kmax = Extract(15, data, size, &bit_pos);
-  const int min_kmin = (anim_config.kmax > 1) ? (anim_config.kmax / 2) : 0;
-  const int max_kmin = (anim_config.kmax > 1) ? (anim_config.kmax - 1) : 0;
-  anim_config.kmin =
-      min_kmin + Extract((uint32_t)(max_kmin - min_kmin), data, size, &bit_pos);
-  anim_config.allow_mixed = Extract(1, data, size, &bit_pos);
+  anim_config.minimize_size = minimize_size;
+  anim_config.kmin = kmin_kmax.first;
+  anim_config.kmax = kmin_kmax.second;
+  anim_config.allow_mixed = allow_mixed;
  anim_config.verbose = 0;

-  const int nb_frames = 1 + Extract(15, data, size, &bit_pos);
-
  // For each frame.
-  for (int i = 0; i < nb_frames; ++i) {
-    if (!AddFrame(&enc, anim_config, &width, &height, timestamp_ms, data, size,
-                  &bit_pos)) {
-      return 0;
+  for (const FrameConfig& frame_config : frame_configs) {
+    if (!AddFrame(&enc, anim_config, &width, &height, timestamp_ms,
+                  frame_config, data, size, &bit_pos)) {
+      return;
    }

-    timestamp_ms += (1 << (2 + Extract(15, data, size, &bit_pos))) +
-                    Extract(1, data, size, &bit_pos);  // [1..131073], arbitrary
+    timestamp_ms += frame_config.timestamp;
  }

  // Assemble.
@ -184,5 +187,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {

  WebPAnimEncoderDelete(enc);
  WebPDataClear(&webp_data);
-  return 0;
 }
+
+}  // namespace
+
+FUZZ_TEST(AnimEncoder, AnimEncoderTest)
+    .WithDomains(
+        fuzztest::String(),
+        /*minimize_size=*/fuzztest::Arbitrary<bool>(), ArbitraryKMinKMax(),
+        /*allow_mixed=*/fuzztest::Arbitrary<bool>(),
+        fuzztest::VectorOf(
+            fuzztest::StructOf<FrameConfig>(
+                fuzztest::InRange<int>(0, 1), fuzztest::InRange<int>(0, 131073),
+                fuzz_utils::ArbitraryWebPConfig(),
+                fuzz_utils::ArbitraryCropOrScaleParams(),
+                fuzztest::InRange<int>(0, fuzz_utils::kNumSourceImages - 1)))
+            .WithMinSize(1)
+            .WithMaxSize(15),
+        /*optimization_index=*/
+        fuzztest::InRange<uint32_t>(0, fuzz_utils::kMaxOptimizationIndex));
--- a/tests/fuzzer/dec_fuzzer.cc
+++ b/tests/fuzzer/dec_fuzzer.cc
@ -0,0 +1,48 @@
+// Copyright 2024 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <cstdint>
+#include <cstdio>
+#include <string_view>
+
+#include "src/webp/decode.h"
+#include "tests/fuzzer/fuzz_utils.h"
+
+namespace {
+
+void DecodeWebP(std::string_view arbitrary_bytes) {
+  WebPDecoderConfig decoder_config;
+  if (!WebPInitDecoderConfig(&decoder_config)) {
+    fprintf(stderr, "WebPInitDecoderConfig failed.\n");
+    abort();
+  }
+  const VP8StatusCode status =
+      WebPDecode(reinterpret_cast<const uint8_t*>(arbitrary_bytes.data()),
+                 arbitrary_bytes.size(), &decoder_config);
+  WebPFreeDecBuffer(&decoder_config.output);
+  // The decoding may fail (because the fuzzed input can be anything) but not
+  // for these reasons.
+  if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_USER_ABORT) {
+    abort();
+  }
+}
+
+FUZZ_TEST(WebPSuite, DecodeWebP)
+    .WithDomains(
+        fuzztest::String()
+            .WithMaxSize(fuzz_utils::kMaxWebPFileSize + 1));
+
+}  // namespace
--- a/tests/fuzzer/enc_dec_fuzzer.cc
+++ b/tests/fuzzer/enc_dec_fuzzer.cc
@ -14,57 +14,37 @@
 //
 ////////////////////////////////////////////////////////////////////////////////

-#include <stdio.h>
-#include <stdlib.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>

 #include "./fuzz_utils.h"
+#include "src/dsp/cpu.h"
 #include "src/webp/decode.h"
 #include "src/webp/encode.h"

 namespace {

-const VP8CPUInfo default_VP8GetCPUInfo = VP8GetCPUInfo;
+const VP8CPUInfo default_VP8GetCPUInfo = fuzz_utils::VP8GetCPUInfo;

-}  // namespace
-
-extern "C" int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
-  uint32_t bit_pos = 0;
-
-  ExtractAndDisableOptimizations(default_VP8GetCPUInfo, data, size, &bit_pos);
+void EncDecTest(bool use_argb, int source_image_index, WebPConfig config,
+                int optimization_index,
+                const fuzz_utils::CropOrScaleParams& crop_or_scale_params) {
+  fuzz_utils::SetOptimization(default_VP8GetCPUInfo, optimization_index);

  // Init the source picture.
-  WebPPicture pic;
-  if (!WebPPictureInit(&pic)) {
-    fprintf(stderr, "WebPPictureInit failed.\n");
-    abort();
-  }
-  pic.use_argb = Extract(1, data, size, &bit_pos);
-
-  // Read the source picture.
-  if (!ExtractSourcePicture(&pic, data, size, &bit_pos)) {
-    const WebPEncodingError error_code = pic.error_code;
-    WebPPictureFree(&pic);
-    if (error_code == VP8_ENC_ERROR_OUT_OF_MEMORY) return 0;
-    fprintf(stderr, "Can't read input image. Error code: %d\n", error_code);
-    abort();
-  }
+  WebPPicture pic = fuzz_utils::GetSourcePicture(source_image_index, use_argb);

  // Crop and scale.
-  if (!ExtractAndCropOrScale(&pic, data, size, &bit_pos)) {
+  if (!fuzz_utils::CropOrScale(&pic, crop_or_scale_params)) {
    const WebPEncodingError error_code = pic.error_code;
    WebPPictureFree(&pic);
-    if (error_code == VP8_ENC_ERROR_OUT_OF_MEMORY) return 0;
+    if (error_code == VP8_ENC_ERROR_OUT_OF_MEMORY) return;
    fprintf(stderr, "ExtractAndCropOrScale failed. Error code: %d\n",
            error_code);
    abort();
  }

-  // Extract a configuration from the packed bits.
-  WebPConfig config;
-  if (!ExtractWebPConfig(&config, data, size, &bit_pos)) {
-    fprintf(stderr, "ExtractWebPConfig failed.\n");
-    abort();
-  }
  // Skip slow settings on big images, it's likely to timeout.
  if (pic.width * pic.height > 32 * 32) {
    if (config.lossless) {
@ -93,7 +73,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
    WebPPictureFree(&pic);
    if (error_code == VP8_ENC_ERROR_OUT_OF_MEMORY ||
        error_code == VP8_ENC_ERROR_BAD_WRITE) {
-      return 0;
+      return;
    }
    fprintf(stderr, "WebPEncode failed. Error code: %d\n", error_code);
    abort();
@ -157,5 +137,16 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
  WebPFreeDecBuffer(&dec_config.output);
  WebPMemoryWriterClear(&memory_writer);
  WebPPictureFree(&pic);
-  return 0;
 }
+
+}  // namespace
+
+FUZZ_TEST(EncDec, EncDecTest)
+    .WithDomains(/*use_argb=*/fuzztest::Arbitrary<bool>(),
+                 /*source_image_index=*/
+                 fuzztest::InRange<int>(0, fuzz_utils::kNumSourceImages - 1),
+                 fuzz_utils::ArbitraryWebPConfig(),
+                 /*optimization_index=*/
+                 fuzztest::InRange<uint32_t>(0,
+                                             fuzz_utils::kMaxOptimizationIndex),
+                 fuzz_utils::ArbitraryCropOrScaleParams());
--- a/tests/fuzzer/enc_fuzzer.cc
+++ b/tests/fuzzer/enc_fuzzer.cc
@ -0,0 +1,140 @@
+// Copyright 2024 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <string>
+#include <string_view>
+
+#include "imageio/image_dec.h"
+#include "src/dsp/cpu.h"
+#include "src/webp/decode.h"
+#include "src/webp/encode.h"
+#include "src/webp/types.h"
+#include "tests/fuzzer/fuzz_utils.h"
+
+namespace {
+
+const VP8CPUInfo default_VP8GetCPUInfo = fuzz_utils::VP8GetCPUInfo;
+
+void EncTest(std::string_view file, uint32_t optimization_index, bool use_argb,
+             WebPConfig config,
+             const fuzz_utils::CropOrScaleParams& crop_or_scale_params) {
+  fuzz_utils::SetOptimization(default_VP8GetCPUInfo, optimization_index);
+
+  // Init the source picture.
+  WebPPicture pic;
+  if (!WebPPictureInit(&pic)) {
+    std::cerr << "WebPPictureInit failed.\n";
+    abort();
+  }
+  pic.use_argb = use_argb;
+
+  const uint8_t* const file_data =
+      reinterpret_cast<const uint8_t*>(file.data());
+  if (fuzz_utils::IsImageTooBig(file_data, file.size())) return;
+  WebPImageReader reader = WebPGuessImageReader(file_data, file.size());
+  if (!reader(file_data, file.size(), &pic, 1, NULL)) return;
+
+  // Crop and scale.
+  if (!CropOrScale(&pic, crop_or_scale_params)) {
+    const WebPEncodingError error_code = pic.error_code;
+    WebPPictureFree(&pic);
+    if (error_code == VP8_ENC_ERROR_OUT_OF_MEMORY) return;
+    std::cerr << "CropOrScale failed. Error code: " << error_code << "\n";
+    abort();
+  }
+
+  // Skip the cruncher except on small images, it's likely to timeout.
+  if (config.lossless && config.quality == 100. && config.method == 6 &&
+      pic.width * pic.height >= 16384) {
+    config.lossless = 0;
+  }
+
+  // Encode.
+  WebPMemoryWriter memory_writer;
+  WebPMemoryWriterInit(&memory_writer);
+  pic.writer = WebPMemoryWrite;
+  pic.custom_ptr = &memory_writer;
+  if (!WebPEncode(&config, &pic)) {
+    const WebPEncodingError error_code = pic.error_code;
+    WebPMemoryWriterClear(&memory_writer);
+    WebPPictureFree(&pic);
+    if (error_code == VP8_ENC_ERROR_OUT_OF_MEMORY) return;
+    std::cerr << "WebPEncode failed. Error code: " << error_code
+              << " \nFile starts with: " << file.substr(0, 20) << "\n";
+    abort();
+  }
+
+  // Try decoding the result.
+  int w, h;
+  const uint8_t* const out_data = memory_writer.mem;
+  const size_t out_size = memory_writer.size;
+  uint8_t* const rgba = WebPDecodeBGRA(out_data, out_size, &w, &h);
+  if (rgba == nullptr || w != pic.width || h != pic.height) {
+    std::cerr << "WebPDecodeBGRA failed.\nFile starts with: "
+              << file.substr(0, 20) << "\n";
+    WebPFree(rgba);
+    WebPMemoryWriterClear(&memory_writer);
+    WebPPictureFree(&pic);
+    abort();
+  }
+
+  // Compare the results if exact encoding.
+  if (pic.use_argb && config.lossless && config.near_lossless == 100) {
+    const uint32_t* src1 = (const uint32_t*)rgba;
+    const uint32_t* src2 = pic.argb;
+    for (int y = 0; y < h; ++y, src1 += w, src2 += pic.argb_stride) {
+      for (int x = 0; x < w; ++x) {
+        uint32_t v1 = src1[x], v2 = src2[x];
+        if (!config.exact) {
+          if ((v1 & 0xff000000u) == 0 || (v2 & 0xff000000u) == 0) {
+            // Only keep alpha for comparison of fully transparent area.
+            v1 &= 0xff000000u;
+            v2 &= 0xff000000u;
+          }
+        }
+        if (v1 != v2) {
+          std::cerr
+              << "Lossless compression failed pixel-exactness.\nFile starts "
+                 "with: "
+              << file.substr(0, 20) << "\n";
+          WebPFree(rgba);
+          WebPMemoryWriterClear(&memory_writer);
+          WebPPictureFree(&pic);
+          abort();
+        }
+      }
+    }
+  }
+
+  WebPFree(rgba);
+  WebPMemoryWriterClear(&memory_writer);
+  WebPPictureFree(&pic);
+}
+
+}  // namespace
+
+FUZZ_TEST(Enc, EncTest)
+    .WithDomains(
+        fuzztest::Arbitrary<std::string>(),
+        /*optimization_index=*/
+        fuzztest::InRange<uint32_t>(0, fuzz_utils::kMaxOptimizationIndex),
+        /*use_argb=*/fuzztest::Arbitrary<bool>(),
+        fuzz_utils::ArbitraryWebPConfig(),
+        fuzz_utils::ArbitraryCropOrScaleParams());
--- a/tests/fuzzer/fuzz_utils.cc
+++ b/tests/fuzzer/fuzz_utils.cc
@ -0,0 +1,201 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "./fuzz_utils.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <string_view>
+#include <tuple>
+#include <vector>
+
+#include "./img_alpha.h"
+#include "./img_grid.h"
+#include "./img_peak.h"
+#include "src/dsp/cpu.h"
+#include "src/webp/decode.h"
+#include "src/webp/encode.h"
+#include "src/webp/types.h"
+
+namespace fuzz_utils {
+
+WebPPicture GetSourcePicture(int image_index, bool use_argb) {
+  WebPPicture pic;
+  if (!WebPPictureInit(&pic)) abort();
+  pic.use_argb = use_argb;
+
+  // Pick a source picture.
+  const int kImagesWidth[] = {kImgAlphaWidth, kImgGridWidth, kImgPeakWidth};
+  const int kImagesHeight[] = {kImgAlphaHeight, kImgGridHeight, kImgPeakHeight};
+  const uint8_t* const image_data = kImagesData[image_index];
+  pic.width = kImagesWidth[image_index];
+  pic.height = kImagesHeight[image_index];
+  pic.argb_stride = pic.width * 4 * sizeof(uint8_t);
+
+  // Read the bytes.
+  if (!WebPPictureImportRGBA(&pic, image_data, pic.argb_stride)) abort();
+  return pic;
+}
+
+//------------------------------------------------------------------------------
+
+int CropOrScale(WebPPicture* const pic, const CropOrScaleParams& params) {
+  if (pic == NULL) return 0;
+#if !defined(WEBP_REDUCE_SIZE)
+  if (params.alter_input) {
+    if (params.crop_or_scale) {
+      const int cropped_width = std::max(1, pic->width / params.width_ratio);
+      const int cropped_height = std::max(1, pic->height / params.height_ratio);
+      const int cropped_left = (pic->width - cropped_width) / params.left_ratio;
+      const int cropped_top = (pic->height - cropped_height) / params.top_ratio;
+      return WebPPictureCrop(pic, cropped_left, cropped_top, cropped_width,
+                             cropped_height);
+    } else {
+      const int scaled_width = 1 + (pic->width * params.width_ratio) / 8;
+      const int scaled_height = 1 + (pic->height * params.height_ratio) / 8;
+      return WebPPictureRescale(pic, scaled_width, scaled_height);
+    }
+  }
+#else   // defined(WEBP_REDUCE_SIZE)
+  (void)data;
+  (void)size;
+  (void)bit_pos;
+#endif  // !defined(WEBP_REDUCE_SIZE)
+  return 1;
+}
+
+extern "C" VP8CPUInfo VP8GetCPUInfo;
+static VP8CPUInfo GetCPUInfo;
+
+static WEBP_INLINE int GetCPUInfoNoSSE41(CPUFeature feature) {
+  if (feature == kSSE4_1 || feature == kAVX) return 0;
+  return GetCPUInfo(feature);
+}
+
+static WEBP_INLINE int GetCPUInfoNoAVX(CPUFeature feature) {
+  if (feature == kAVX) return 0;
+  return GetCPUInfo(feature);
+}
+
+static WEBP_INLINE int GetCPUInfoForceSlowSSSE3(CPUFeature feature) {
+  if (feature == kSlowSSSE3 && GetCPUInfo(kSSE3)) {
+    return 1;  // we have SSE3 -> force SlowSSSE3
+  }
+  return GetCPUInfo(feature);
+}
+
+static WEBP_INLINE int GetCPUInfoOnlyC(CPUFeature feature) {
+  (void)feature;
+  return 0;
+}
+
+void SetOptimization(VP8CPUInfo default_VP8GetCPUInfo, uint32_t index) {
+  assert(index <= kMaxOptimizationIndex);
+  GetCPUInfo = default_VP8GetCPUInfo;
+  const VP8CPUInfo kVP8CPUInfos[kMaxOptimizationIndex + 1] = {
+      GetCPUInfoOnlyC, GetCPUInfoForceSlowSSSE3, GetCPUInfoNoSSE41,
+      GetCPUInfoNoAVX, GetCPUInfo};
+  VP8GetCPUInfo = kVP8CPUInfos[index];
+}
+
+//------------------------------------------------------------------------------
+
+std::vector<std::string> ReadFilesFromDirectory(std::string_view dir) {
+  std::vector<std::tuple<std::string>> tuples =
+      fuzztest::ReadFilesFromDirectory(dir);
+  std::vector<std::string> strings(tuples.size());
+  for (size_t i = 0; i < tuples.size(); ++i) {
+    using std::swap;
+    swap(std::get<0>(tuples[i]), strings[i]);
+  }
+  return strings;
+}
+
+//------------------------------------------------------------------------------
+// The code in this section is copied from
+// https://github.com/webmproject/sjpeg/blob/
+//                1c025b3dbc2246de3e1d7c287970f1a01291800f/src/jpeg_tools.cc#L47
+// (same license as this file).
+
+namespace {
+// Constants below are marker codes defined in JPEG spec
+// ISO/IEC 10918-1 : 1993(E) Table B.1
+// See also: http://www.w3.org/Graphics/JPEG/itu-t81.pdf
+
+#define M_SOF0 0xffc0
+#define M_SOF1 0xffc1
+
+const uint8_t* GetSOFData(const uint8_t* src, int size) {
+  if (src == NULL) return NULL;
+  const uint8_t* const end = src + size - 8;  // 8 bytes of safety, for marker
+  src += 2;                                   // skip M_SOI
+  for (; src < end && *src != 0xff; ++src) {  /* search first 0xff marker */
+  }
+  while (src < end) {
+    const uint32_t marker = static_cast<uint32_t>((src[0] << 8) | src[1]);
+    if (marker == M_SOF0 || marker == M_SOF1) return src;
+    const size_t s = 2 + ((src[2] << 8) | src[3]);
+    src += s;
+  }
+  return NULL;  // No SOF marker found
+}
+
+bool SjpegDimensions(const uint8_t* src0, size_t size, int* width, int* height,
+                     int* is_yuv420) {
+  if (width == NULL || height == NULL) return false;
+  const uint8_t* src = GetSOFData(src0, size);
+  const size_t left_over = size - (src - src0);
+  if (src == NULL || left_over < 8 + 3 * 1) return false;
+  if (height != NULL) *height = (src[5] << 8) | src[6];
+  if (width != NULL) *width = (src[7] << 8) | src[8];
+  if (is_yuv420 != NULL) {
+    const size_t nb_comps = src[9];
+    *is_yuv420 = (nb_comps == 3);
+    if (left_over < 11 + 3 * nb_comps) return false;
+    for (int c = 0; *is_yuv420 && c < 3; ++c) {
+      const int expected_dim = (c == 0 ? 0x22 : 0x11);
+      *is_yuv420 &= (src[11 + c * 3] == expected_dim);
+    }
+  }
+  return true;
+}
+}  // namespace
+
+//------------------------------------------------------------------------------
+
+bool IsImageTooBig(const uint8_t* data, size_t size) {
+  int width, height, components;
+  if (SjpegDimensions(data, size, &width, &height, &components) ||
+      WebPGetInfo(data, size, &width, &height)) {
+    // Look at the number of 8x8px blocks rather than the overall pixel count
+    // when comparing to memory and duration thresholds.
+    const size_t ceiled_width = ((size_t)width + 7) / 8 * 8;
+    const size_t ceiled_height = ((size_t)height + 7) / 8 * 8;
+    // Threshold to avoid out-of-memory and timeout issues.
+    // The threshold is arbitrary but below the fuzzer limit of 2 GB.
+    // The value cannot be 2 GB because of the added memory by MSAN.
+    if (ceiled_width * ceiled_height > kFuzzPxLimit) return true;
+  }
+  return false;
+}
+
+}  // namespace fuzz_utils
--- a/tests/fuzzer/fuzz_utils.h
+++ b/tests/fuzzer/fuzz_utils.h
@ -1,4 +1,4 @@
-// Copyright 2018 Google Inc.
+// Copyright 2018-2024 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -17,14 +17,23 @@
 #ifndef WEBP_TESTS_FUZZER_FUZZ_UTILS_H_
 #define WEBP_TESTS_FUZZER_FUZZ_UTILS_H_

-#include <stdint.h>
-#include <stdlib.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>

 #include "./img_alpha.h"
 #include "./img_grid.h"
 #include "./img_peak.h"
-#include "src/dsp/dsp.h"
+#include "src/dsp/cpu.h"
 #include "src/webp/encode.h"
+#include "fuzztest/fuzztest.h"
+
+namespace fuzz_utils {

 //------------------------------------------------------------------------------
 // Arbitrary limits to prevent OOM, timeout, or slow execution.
@ -54,170 +63,139 @@ static WEBP_INLINE uint8_t FuzzHash(const uint8_t* const data, size_t size) {
  return value;
 }

-//------------------------------------------------------------------------------
-// Extract an integer in [0, max_value].
-
-static WEBP_INLINE uint32_t Extract(uint32_t max_value,
-                                    const uint8_t data[], size_t size,
-                                    uint32_t* const bit_pos) {
-  uint32_t v = 0;
-  uint32_t range = 1;
-  while (*bit_pos < 8 * size && range <= max_value) {
-    const uint8_t mask = 1u << (*bit_pos & 7);
-    v = (v << 1) | !!(data[*bit_pos >> 3] & mask);
-    range <<= 1;
-    ++*bit_pos;
-  }
-  return v % (max_value + 1);
-}
-
-//------------------------------------------------------------------------------
-// Some functions to override VP8GetCPUInfo and disable some optimizations.
-
 #ifdef __cplusplus
 extern "C" VP8CPUInfo VP8GetCPUInfo;
 #else
 extern VP8CPUInfo VP8GetCPUInfo;
 #endif
-static VP8CPUInfo GetCPUInfo;
-
-static WEBP_INLINE int GetCPUInfoNoSSE41(CPUFeature feature) {
-  if (feature == kSSE4_1 || feature == kAVX) return 0;
-  return GetCPUInfo(feature);
-}
-
-static WEBP_INLINE int GetCPUInfoNoAVX(CPUFeature feature) {
-  if (feature == kAVX) return 0;
-  return GetCPUInfo(feature);
-}
-
-static WEBP_INLINE int GetCPUInfoForceSlowSSSE3(CPUFeature feature) {
-  if (feature == kSlowSSSE3 && GetCPUInfo(kSSE3)) {
-    return 1;  // we have SSE3 -> force SlowSSSE3
-  }
-  return GetCPUInfo(feature);
-}
-
-static WEBP_INLINE int GetCPUInfoOnlyC(CPUFeature feature) {
-  (void)feature;
-  return 0;
-}
-
-static WEBP_INLINE void ExtractAndDisableOptimizations(
-    VP8CPUInfo default_VP8GetCPUInfo, const uint8_t data[], size_t size,
-    uint32_t* const bit_pos) {
-  GetCPUInfo = default_VP8GetCPUInfo;
-  const VP8CPUInfo kVP8CPUInfos[5] = {GetCPUInfoOnlyC, GetCPUInfoForceSlowSSSE3,
-                                      GetCPUInfoNoSSE41, GetCPUInfoNoAVX,
-                                      GetCPUInfo};
-  int VP8GetCPUInfo_index = Extract(4, data, size, bit_pos);
-  VP8GetCPUInfo = kVP8CPUInfos[VP8GetCPUInfo_index];
-}

 //------------------------------------------------------------------------------

-static WEBP_INLINE int ExtractWebPConfig(WebPConfig* const config,
-                                         const uint8_t data[], size_t size,
-                                         uint32_t* const bit_pos) {
-  if (config == NULL || !WebPConfigInit(config)) return 0;
-  config->lossless = Extract(1, data, size, bit_pos);
-  config->quality = Extract(100, data, size, bit_pos);
-  config->method = Extract(6, data, size, bit_pos);
-  config->image_hint =
-      (WebPImageHint)Extract(WEBP_HINT_LAST - 1, data, size, bit_pos);
-  config->segments = 1 + Extract(3, data, size, bit_pos);
-  config->sns_strength = Extract(100, data, size, bit_pos);
-  config->filter_strength = Extract(100, data, size, bit_pos);
-  config->filter_sharpness = Extract(7, data, size, bit_pos);
-  config->filter_type = Extract(1, data, size, bit_pos);
-  config->autofilter = Extract(1, data, size, bit_pos);
-  config->alpha_compression = Extract(1, data, size, bit_pos);
-  config->alpha_filtering = Extract(2, data, size, bit_pos);
-  config->alpha_quality = Extract(100, data, size, bit_pos);
-  config->pass = 1 + Extract(9, data, size, bit_pos);
-  config->show_compressed = 1;
-  config->preprocessing = Extract(2, data, size, bit_pos);
-  config->partitions = Extract(3, data, size, bit_pos);
-  config->partition_limit = 10 * Extract(10, data, size, bit_pos);
-  config->emulate_jpeg_size = Extract(1, data, size, bit_pos);
-  config->thread_level = Extract(1, data, size, bit_pos);
-  config->low_memory = Extract(1, data, size, bit_pos);
-  config->near_lossless = 20 * Extract(5, data, size, bit_pos);
-  config->exact = Extract(1, data, size, bit_pos);
-  config->use_delta_palette = Extract(1, data, size, bit_pos);
-  config->use_sharp_yuv = Extract(1, data, size, bit_pos);
-  return WebPValidateConfig(config);
+constexpr const uint8_t* kImagesData[] = {kImgAlphaData, kImgGridData,
+                                          kImgPeakData};
+constexpr size_t kNumSourceImages =
+    sizeof(kImagesData) / sizeof(kImagesData[0]);
+
+WebPPicture GetSourcePicture(int image_index, bool use_argb);
+
+static inline auto ArbitraryWebPConfig() {
+  return fuzztest::Map(
+      [](int lossless, int quality, int method, int image_hint, int segments,
+         int sns_strength, int filter_strength, int filter_sharpness,
+         int filter_type, int autofilter, int alpha_compression,
+         int alpha_filtering, int alpha_quality, int pass, int preprocessing,
+         int partitions, int partition_limit, int emulate_jpeg_size,
+         int thread_level, int low_memory, int near_lossless, int exact,
+         int use_delta_palette, int use_sharp_yuv) -> WebPConfig {
+        WebPConfig config;
+        if (!WebPConfigInit(&config)) abort();
+        config.lossless = lossless;
+        config.quality = quality;
+        config.method = method;
+        config.image_hint = (WebPImageHint)image_hint;
+        config.segments = segments;
+        config.sns_strength = sns_strength;
+        config.filter_strength = filter_strength;
+        config.filter_sharpness = filter_sharpness;
+        config.filter_type = filter_type;
+        config.autofilter = autofilter;
+        config.alpha_compression = alpha_compression;
+        config.alpha_filtering = alpha_filtering;
+        config.alpha_quality = alpha_quality;
+        config.pass = pass;
+        config.show_compressed = 1;
+        config.preprocessing = preprocessing;
+        config.partitions = partitions;
+        config.partition_limit = 10 * partition_limit;
+        config.emulate_jpeg_size = emulate_jpeg_size;
+        config.thread_level = thread_level;
+        config.low_memory = low_memory;
+        config.near_lossless = 20 * near_lossless;
+        config.exact = exact;
+        config.use_delta_palette = use_delta_palette;
+        config.use_sharp_yuv = use_sharp_yuv;
+        if (!WebPValidateConfig(&config)) abort();
+        return config;
+      },
+      /*lossless=*/fuzztest::InRange<int>(0, 1),
+      /*quality=*/fuzztest::InRange<int>(0, 100),
+      /*method=*/fuzztest::InRange<int>(0, 6),
+      /*image_hint=*/fuzztest::InRange<int>(0, WEBP_HINT_LAST - 1),
+      /*segments=*/fuzztest::InRange<int>(1, 4),
+      /*sns_strength=*/fuzztest::InRange<int>(0, 100),
+      /*filter_strength=*/fuzztest::InRange<int>(0, 100),
+      /*filter_sharpness=*/fuzztest::InRange<int>(0, 7),
+      /*filter_type=*/fuzztest::InRange<int>(0, 1),
+      /*autofilter=*/fuzztest::InRange<int>(0, 1),
+      /*alpha_compression=*/fuzztest::InRange<int>(0, 1),
+      /*alpha_filtering=*/fuzztest::InRange<int>(0, 2),
+      /*alpha_quality=*/fuzztest::InRange<int>(0, 100),
+      /*pass=*/fuzztest::InRange<int>(1, 10),
+      /*preprocessing=*/fuzztest::InRange<int>(0, 2),
+      /*partitions=*/fuzztest::InRange<int>(0, 3),
+      /*partition_limit=*/fuzztest::InRange<int>(0, 10),
+      /*emulate_jpeg_size=*/fuzztest::InRange<int>(0, 1),
+      /*thread_level=*/fuzztest::InRange<int>(0, 1),
+      /*low_memory=*/fuzztest::InRange<int>(0, 1),
+      /*near_lossless=*/fuzztest::InRange<int>(0, 5),
+      /*exact=*/fuzztest::InRange<int>(0, 1),
+      /*use_delta_palette=*/fuzztest::InRange<int>(0, 1),
+      /*use_sharp_yuv=*/fuzztest::InRange<int>(0, 1));
 }

+struct CropOrScaleParams {
+  bool alter_input;
+  bool crop_or_scale;
+  int width_ratio;
+  int height_ratio;
+  int left_ratio;
+  int top_ratio;
+};
+
+static inline auto ArbitraryCropOrScaleParams() {
+  return fuzztest::Map(
+      [](const std::optional<std::pair<int, int>>& width_height_ratio,
+         const std::optional<std::pair<int, int>>& left_top_ratio)
+          -> CropOrScaleParams {
+        CropOrScaleParams params;
+        params.alter_input = width_height_ratio.has_value();
+        if (params.alter_input) {
+          params.width_ratio = width_height_ratio->first;
+          params.height_ratio = width_height_ratio->second;
+          params.crop_or_scale = left_top_ratio.has_value();
+          if (params.crop_or_scale) {
+            params.left_ratio = left_top_ratio->first;
+            params.top_ratio = left_top_ratio->second;
+          }
+        }
+        return params;
+      },
+      fuzztest::OptionalOf(
+          fuzztest::PairOf(fuzztest::InRange(1, 8), fuzztest::InRange(1, 8))),
+      fuzztest::OptionalOf(
+          fuzztest::PairOf(fuzztest::InRange(1, 8), fuzztest::InRange(1, 8))));
+}
+
+// Crops or scales a picture according to the given params.
+int CropOrScale(WebPPicture* pic, const CropOrScaleParams& params);
+
+// Imposes a level of optimization among one of the kMaxOptimizationIndex+1
+// possible values: OnlyC, ForceSlowSSSE3, NoSSE41, NoAVX, default.
+static constexpr uint32_t kMaxOptimizationIndex = 4;
+void SetOptimization(VP8CPUInfo default_VP8GetCPUInfo, uint32_t index);
+
 //------------------------------------------------------------------------------

-static WEBP_INLINE int ExtractSourcePicture(WebPPicture* const pic,
-                                            const uint8_t data[], size_t size,
-                                            uint32_t* const bit_pos) {
-  if (pic == NULL) return 0;
+// See https://developers.google.com/speed/webp/docs/riff_container.
+static constexpr size_t kMaxWebPFileSize = (1ull << 32) - 2;  // 4 GiB - 2

-  // Pick a source picture.
-  const uint8_t* kImagesData[] = {
-      kImgAlphaData,
-      kImgGridData,
-      kImgPeakData
-  };
-  const int kImagesWidth[] = {
-      kImgAlphaWidth,
-      kImgGridWidth,
-      kImgPeakWidth
-  };
-  const int kImagesHeight[] = {
-      kImgAlphaHeight,
-      kImgGridHeight,
-      kImgPeakHeight
-  };
-  const size_t kNbImages = sizeof(kImagesData) / sizeof(kImagesData[0]);
-  const size_t image_index = Extract(kNbImages - 1, data, size, bit_pos);
-  const uint8_t* const image_data = kImagesData[image_index];
-  pic->width = kImagesWidth[image_index];
-  pic->height = kImagesHeight[image_index];
-  pic->argb_stride = pic->width * 4 * sizeof(uint8_t);
+std::vector<std::string> GetDictionaryFromFiles(
+    const std::vector<std::string_view>& file_paths);

-  // Read the bytes.
-  return WebPPictureImportRGBA(pic, image_data, pic->argb_stride);
-}
+// Checks whether the binary blob containing a JPEG or WebP is too big for the
+// fuzzer.
+bool IsImageTooBig(const uint8_t* data, size_t size);

-//------------------------------------------------------------------------------
-
-static WEBP_INLINE int Max(int a, int b) { return ((a < b) ? b : a); }
-
-static WEBP_INLINE int ExtractAndCropOrScale(WebPPicture* const pic,
-                                             const uint8_t data[], size_t size,
-                                             uint32_t* const bit_pos) {
-  if (pic == NULL) return 0;
-#if !defined(WEBP_REDUCE_SIZE)
-  const int alter_input = Extract(1, data, size, bit_pos);
-  const int crop_or_scale = Extract(1, data, size, bit_pos);
-  const int width_ratio = 1 + Extract(7, data, size, bit_pos);
-  const int height_ratio = 1 + Extract(7, data, size, bit_pos);
-  if (alter_input) {
-    if (crop_or_scale) {
-      const uint32_t left_ratio = 1 + Extract(7, data, size, bit_pos);
-      const uint32_t top_ratio = 1 + Extract(7, data, size, bit_pos);
-      const int cropped_width = Max(1, pic->width / width_ratio);
-      const int cropped_height = Max(1, pic->height / height_ratio);
-      const int cropped_left = (pic->width - cropped_width) / left_ratio;
-      const int cropped_top = (pic->height - cropped_height) / top_ratio;
-      return WebPPictureCrop(pic, cropped_left, cropped_top, cropped_width,
-                             cropped_height);
-    } else {
-      const int scaled_width = 1 + (pic->width * width_ratio) / 8;
-      const int scaled_height = 1 + (pic->height * height_ratio) / 8;
-      return WebPPictureRescale(pic, scaled_width, scaled_height);
-    }
-  }
-#else   // defined(WEBP_REDUCE_SIZE)
-  (void)data;
-  (void)size;
-  (void)bit_pos;
-#endif  // !defined(WEBP_REDUCE_SIZE)
-  return 1;
-}
+}  // namespace fuzz_utils

 #endif  // WEBP_TESTS_FUZZER_FUZZ_UTILS_H_
--- a/tests/fuzzer/huffman_fuzzer.cc
+++ b/tests/fuzzer/huffman_fuzzer.cc
@ -14,22 +14,29 @@
 //
 ////////////////////////////////////////////////////////////////////////////////

-#include <stdint.h>
-#include <string.h>
+#include <cstddef>
+#include <cstdint>
+#include <string_view>

+#include "./fuzz_utils.h"
 #include "src/dec/vp8li_dec.h"
 #include "src/utils/bit_reader_utils.h"
 #include "src/utils/huffman_utils.h"
 #include "src/utils/utils.h"
 #include "src/webp/format_constants.h"

-int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
+namespace {
+
+void HuffmanTest(std::string_view blob) {
+  const uint8_t* const data = reinterpret_cast<const uint8_t*>(blob.data());
+  const size_t size = blob.size();
+
  // Number of bits to initialize data.
  static const int kColorCacheBitsBits = 4;
  // 'num_htree_groups' is contained in the RG channel, hence 16 bits.
  static const int kNumHtreeGroupsBits = 16;
  if (size * sizeof(*data) < kColorCacheBitsBits + kNumHtreeGroupsBits) {
-    return 0;
+    return;
  }

  // A non-NULL mapping brings minor changes that are tested by the normal
@ -39,27 +46,32 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
  memset(&huffman_tables, 0, sizeof(huffman_tables));
  HTreeGroup* htree_groups = NULL;

+  int num_htree_groups, num_htree_groups_max, color_cache_bits;
+  VP8LBitReader* br;
  VP8LDecoder* dec = VP8LNew();
  if (dec == NULL) goto Error;
-  VP8LBitReader* const br = &dec->br_;
+  br = &dec->br_;
  VP8LInitBitReader(br, data, size);

-  const int color_cache_bits = VP8LReadBits(br, kColorCacheBitsBits);
+  color_cache_bits = VP8LReadBits(br, kColorCacheBitsBits);
  if (color_cache_bits < 1 || color_cache_bits > MAX_CACHE_BITS) goto Error;

-  const int num_htree_groups = VP8LReadBits(br, kNumHtreeGroupsBits);
+  num_htree_groups = VP8LReadBits(br, kNumHtreeGroupsBits);
  // 'num_htree_groups' cannot be 0 as it is built from a non-empty image.
  if (num_htree_groups == 0) goto Error;
  // This variable is only useful when mapping is not NULL.
-  const int num_htree_groups_max = num_htree_groups;
+  num_htree_groups_max = num_htree_groups;
  (void)ReadHuffmanCodesHelper(color_cache_bits, num_htree_groups,
                               num_htree_groups_max, mapping, dec,
                               &huffman_tables, &htree_groups);

- Error:
+Error:
  WebPSafeFree(mapping);
  VP8LHtreeGroupsFree(htree_groups);
  VP8LHuffmanTablesDeallocate(&huffman_tables);
  VP8LDelete(dec);
-  return 0;
 }
+
+}  // namespace
+
+FUZZ_TEST(Huffman, HuffmanTest).WithDomains(fuzztest::String());
--- a/tests/fuzzer/imageio_fuzzer.cc
+++ b/tests/fuzzer/imageio_fuzzer.cc
@ -0,0 +1,75 @@
+// Copyright 2024 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+// Fuzzing of libwebp's image readers
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+#include "imageio/image_dec.h"
+#include "imageio/metadata.h"
+#include "src/webp/encode.h"
+#include "tests/fuzzer/fuzz_utils.h"
+
+namespace {
+
+void TestReader(const uint8_t *data, size_t size, WebPImageReader reader,
+                bool keep_alpha, bool use_argb) {
+  WebPPicture pic;
+  if (!WebPPictureInit(&pic)) {
+    std::cerr << "WebPPictureInit failed" << std::endl;
+    abort();
+  }
+  Metadata metadata;
+  MetadataInit(&metadata);
+  pic.use_argb = use_argb ? 1 : 0;
+
+  if (!fuzz_utils::IsImageTooBig(data, size)) {
+    (void)(*reader)(data, size, &pic, keep_alpha ? 1 : 0, &metadata);
+  }
+  WebPPictureFree(&pic);
+  MetadataFree(&metadata);
+}
+
+constexpr WebPInputFileFormat kUnknown = WEBP_UNSUPPORTED_FORMAT;
+
+void Decode(std::string_view arbitrary_bytes, WebPInputFileFormat format,
+            bool keep_alpha, bool use_argb) {
+  const uint8_t *data =
+      reinterpret_cast<const uint8_t *>(arbitrary_bytes.data());
+  const size_t size = arbitrary_bytes.size();
+  if (format == kUnknown) {
+    (void)WebPGuessImageType(data, size);  // shouldn't fail
+    TestReader(data, size, WebPGuessImageReader(data, size), keep_alpha,
+               use_argb);
+  } else {
+    TestReader(data, size, WebPGetImageReader(format), keep_alpha, use_argb);
+  }
+}
+
+FUZZ_TEST(ImageIOSuite, Decode)
+    .WithDomains(
+        fuzztest::String()
+            .WithMaxSize(fuzz_utils::kMaxWebPFileSize + 1),
+        fuzztest::ElementOf<WebPInputFileFormat>(
+            {WEBP_PNG_FORMAT, WEBP_JPEG_FORMAT, WEBP_TIFF_FORMAT,
+             WEBP_WEBP_FORMAT, WEBP_PNM_FORMAT, kUnknown}),
+        /*keep_alpha=*/fuzztest::Arbitrary<bool>(),
+        /*use_argb=*/fuzztest::Arbitrary<bool>());
+
+}  // namespace
--- a/tests/fuzzer/makefile.unix
+++ b/tests/fuzzer/makefile.unix
@ -1,31 +0,0 @@
-# This Makefile will compile all fuzzing targets. It doesn't check tool
-# requirements and paths may need to be updated depending on your environment.
-# Note a clang 6+ toolchain is assumed for use of -fsanitize=fuzzer.
-
-CC = clang
-CXX = clang++
-CFLAGS = -fsanitize=fuzzer -I../../src -I../.. -Wall -Wextra
-CXXFLAGS = $(CFLAGS)
-LDFLAGS = -fsanitize=fuzzer
-LDLIBS = ../../src/mux/libwebpmux.a ../../src/demux/libwebpdemux.a
-LDLIBS += ../../src/libwebp.a ../../imageio/libimageio_util.a
-LDLIBS += ../../sharpyuv/libsharpyuv.a
-
-FUZZERS = advanced_api_fuzzer animation_api_fuzzer animdecoder_fuzzer
-FUZZERS += animencoder_fuzzer enc_dec_fuzzer huffman_fuzzer
-FUZZERS += mux_demux_api_fuzzer simple_api_fuzzer
-
-%.o: fuzz_utils.h img_alpha.h img_grid.h img_peak.h
-all: $(FUZZERS)
-
-define FUZZER_template
-$(1): $$(addsuffix .o, $(1)) $(LDLIBS)
-OBJS += $$(addsuffix .o, $(1))
-endef
-
-$(foreach fuzzer, $(FUZZERS), $(eval $(call FUZZER_template, $(fuzzer))))
-
-clean:
-	$(RM) $(FUZZERS) $(OBJS)
-
-.PHONY: all clean
--- a/tests/fuzzer/mux_demux_api_fuzzer.cc
+++ b/tests/fuzzer/mux_demux_api_fuzzer.cc
@ -14,23 +14,30 @@
 //
 ////////////////////////////////////////////////////////////////////////////////

+#include <cstddef>
+#include <cstdint>
+#include <string_view>
+
 #include "./fuzz_utils.h"
 #include "src/webp/demux.h"
 #include "src/webp/mux.h"

-int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
+namespace {
+
+void MuxDemuxApiTest(std::string_view data_in, bool mux) {
+  const size_t size = data_in.size();
  WebPData webp_data;
  WebPDataInit(&webp_data);
  webp_data.size = size;
-  webp_data.bytes = data;
+  webp_data.bytes = reinterpret_cast<const uint8_t*>(data_in.data());

  // Extracted chunks and frames are not processed or decoded,
  // which is already covered extensively by the other fuzz targets.

-  if (size & 1) {
+  if (mux) {
    // Mux API
    WebPMux* mux = WebPMuxCreate(&webp_data, size & 2);
-    if (!mux) return 0;
+    if (!mux) return;

    WebPData chunk;
    (void)WebPMuxGetChunk(mux, "EXIF", &chunk);
@ -45,7 +52,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {

    WebPMuxError status;
    WebPMuxFrameInfo info;
-    for (int i = 0; i < kFuzzFrameLimit; i++) {
+    for (int i = 0; i < fuzz_utils::kFuzzFrameLimit; i++) {
      status = WebPMuxGetFrame(mux, i + 1, &info);
      if (status == WEBP_MUX_NOT_FOUND) {
        break;
@ -63,11 +70,11 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
      demux = WebPDemuxPartial(&webp_data, &state);
      if (state < WEBP_DEMUX_PARSED_HEADER) {
        WebPDemuxDelete(demux);
-        return 0;
+        return;
      }
    } else {
      demux = WebPDemux(&webp_data);
-      if (!demux) return 0;
+      if (!demux) return;
    }

    WebPChunkIterator chunk_iter;
@ -83,7 +90,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {

    WebPIterator iter;
    if (WebPDemuxGetFrame(demux, 1, &iter)) {
-      for (int i = 1; i < kFuzzFrameLimit; i++) {
+      for (int i = 1; i < fuzz_utils::kFuzzFrameLimit; i++) {
        if (!WebPDemuxNextFrame(&iter)) break;
      }
    }
@ -91,6 +98,12 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
    WebPDemuxReleaseIterator(&iter);
    WebPDemuxDelete(demux);
  }
-
-  return 0;
 }
+
+}  // namespace
+
+FUZZ_TEST(MuxDemuxApi, MuxDemuxApiTest)
+    .WithDomains(
+        fuzztest::String()
+            .WithMaxSize(fuzz_utils::kMaxWebPFileSize + 1),
+        /*mux=*/fuzztest::Arbitrary<bool>());
--- a/tests/fuzzer/oss-fuzz/build.sh
+++ b/tests/fuzzer/oss-fuzz/build.sh
@ -0,0 +1,86 @@
+#!/bin/bash
+# Copyright 2018 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+# This script is meant to be run by the oss-fuzz infrastructure from the script
+# https://github.com/google/oss-fuzz/blob/master/projects/libwebp/build.sh
+# It builds the different fuzz targets.
+# Only the libfuzzer engine is supported.
+
+# To test changes to this file:
+# - make changes and commit to your REPO
+# - run:
+#     git clone --depth=1 git@github.com:google/oss-fuzz.git
+#     cd oss-fuzz
+# - modify projects/libwebp/Dockerfile to point to your REPO
+# - run:
+#     python3 infra/helper.py build_image libwebp
+#     # enter 'y' and wait for everything to be downloaded
+# - run:
+#     python3 infra/helper.py build_fuzzers --sanitizer address libwebp
+#     # wait for the tests to be built
+# And then run the fuzzer locally, for example:
+#     python3 infra/helper.py run_fuzzer libwebp \
+#     --sanitizer address \
+#     animencoder_fuzzer@AnimEncoder.AnimEncoderTest
+
+set -eu
+
+EXTRA_CMAKE_FLAGS=""
+export CXXFLAGS="${CXXFLAGS} -DFUZZTEST_COMPATIBILITY_MODE"
+EXTRA_CMAKE_FLAGS="-DFUZZTEST_COMPATIBILITY_MODE=libfuzzer"
+
+# limit allocation size to reduce spurious OOMs
+WEBP_CFLAGS="$CFLAGS -DWEBP_MAX_IMAGE_SIZE=838860800" # 800MiB
+
+export CFLAGS="$WEBP_CFLAGS"
+cmake -S . -B build -DWEBP_BUILD_FUZZTEST=ON ${EXTRA_CMAKE_FLAGS}
+cd build && make -j$(nproc) && cd ..
+
+find $SRC/libwebp-test-data -type f -size -32k -iname "*.webp" \
+  -exec zip -qju fuzz_seed_corpus.zip "{}" \;
+
+# The following is taken from https://github.com/google/oss-fuzz/blob/31ac7244748ea7390015455fb034b1f4eda039d9/infra/base-images/base-builder/compile_fuzztests.sh#L59
+# Iterate the fuzz binaries and list each fuzz entrypoint in the binary. For
+# each entrypoint create a wrapper script that calls into the binaries the
+# given entrypoint as argument.
+# The scripts will be named:
+# {binary_name}@{fuzztest_entrypoint}
+FUZZ_TEST_BINARIES_OUT_PATHS=$(find ./build/tests/fuzzer/ -executable -type f)
+echo "Fuzz binaries: $FUZZ_TEST_BINARIES_OUT_PATHS"
+for fuzz_main_file in $FUZZ_TEST_BINARIES_OUT_PATHS; do
+  FUZZ_TESTS=$($fuzz_main_file --list_fuzz_tests | cut -d ' ' -f 4)
+  cp -f ${fuzz_main_file} $OUT/
+  fuzz_basename=$(basename $fuzz_main_file)
+  chmod -x $OUT/$fuzz_basename
+  for fuzz_entrypoint in $FUZZ_TESTS; do
+    TARGET_FUZZER="${fuzz_basename}@$fuzz_entrypoint"
+    # Write executer script
+    cat << EOF > $OUT/$TARGET_FUZZER
+#!/bin/sh
+# LLVMFuzzerTestOneInput for fuzzer detection.
+this_dir=\$(dirname "\$0")
+export TEST_DATA_DIRS=\$this_dir/corpus
+chmod +x \$this_dir/$fuzz_basename
+\$this_dir/$fuzz_basename --fuzz=$fuzz_entrypoint -- \$@
+chmod -x \$this_dir/$fuzz_basename
+EOF
+    chmod +x $OUT/$TARGET_FUZZER
+  done
+  # Copy data.
+  cp fuzz_seed_corpus.zip $OUT/${fuzz_basename}_seed_corpus.zip
+  cp tests/fuzzer/fuzz.dict $OUT/${fuzz_basename}.dict
+done
--- a/tests/fuzzer/patch.sh
+++ b/tests/fuzzer/patch.sh
@ -0,0 +1,10 @@
+#!/bin/sh
+# Fixes for https://github.com/google/fuzztest/issues/1124
+sed -i -e "s/-fsanitize=address//g" -e "s/-DADDRESS_SANITIZER//g" \
+  ./cmake/FuzzTestFlagSetup.cmake
+# Fixes for https://github.com/google/fuzztest/issues/1125
+before="if (IsEnginePlaceholderInput(data)) return;"
+after="if (data.size() == 0) return;"
+sed -i "s/${before}/${after}/" ./fuzztest/internal/compatibility_mode.cc
+sed -i "s/set(GTEST_HAS_ABSL ON)/set(GTEST_HAS_ABSL OFF)/" \
+  ./cmake/BuildDependencies.cmake
--- a/tests/fuzzer/simple_api_fuzzer.cc
+++ b/tests/fuzzer/simple_api_fuzzer.cc
@ -14,15 +14,23 @@
 //
 ////////////////////////////////////////////////////////////////////////////////

+#include <cstddef>
+#include <cstdint>
+#include <string_view>
+
 #include "./fuzz_utils.h"
 #include "src/webp/decode.h"

-int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
-  int w, h;
-  if (!WebPGetInfo(data, size, &w, &h)) return 0;
-  if ((size_t)w * h > kFuzzPxLimit) return 0;
+namespace {

-  const uint8_t value = FuzzHash(data, size);
+void SimpleApiTest(std::string_view data_in) {
+  const uint8_t* const data = reinterpret_cast<const uint8_t*>(data_in.data());
+  const size_t size = data_in.size();
+  int w, h;
+  if (!WebPGetInfo(data, size, &w, &h)) return;
+  if ((size_t)w * h > fuzz_utils::kFuzzPxLimit) return;
+
+  const uint8_t value = fuzz_utils::FuzzHash(data, size);
  uint8_t* buf = NULL;

  // For *Into functions, which decode into an external buffer, an
@ -84,6 +92,11 @@ int LLVMFuzzerTestOneInput(const uint8_t* const data, size_t size) {
  }

  if (buf) WebPFree(buf);
-
-  return 0;
 }
+
+}  // namespace
+
+FUZZ_TEST(SimpleApi, SimpleApiTest)
+    .WithDomains(
+        fuzztest::String()
+            .WithMaxSize(fuzz_utils::kMaxWebPFileSize + 1));
--- a/tests/fuzzer/webp_info_fuzzer.cc
+++ b/tests/fuzzer/webp_info_fuzzer.cc
@ -0,0 +1,42 @@
+// Copyright 2024 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <cstdint>
+
+#include "src/webp/mux_types.h"
+#include "tests/fuzzer/fuzz_utils.h"
+
+// Don't do that at home!
+#define main exec_main
+#include "examples/webpinfo.c"
+#undef main
+
+void WebPInfoTest(std::string_view data) {
+  WebPInfo webp_info;
+  WebPInfoInit(&webp_info);
+  webp_info.quiet_ = 1;
+  webp_info.show_summary_ = 0;
+  webp_info.show_diagnosis_ = 0;
+  webp_info.parse_bitstream_ = 1;
+  WebPData webp_data = {reinterpret_cast<const uint8_t *>(data.data()),
+                        data.size()};
+  AnalyzeWebP(&webp_info, &webp_data);
+}
+
+FUZZ_TEST(WebPInfo, WebPInfoTest)
+    .WithDomains(
+        fuzztest::String()
+            .WithMaxSize(fuzz_utils::kMaxWebPFileSize + 1));
Author	SHA1	Message	Date
James Zern	615e58744f	Merge "make VP8LPredictor[01]_C() static" into main	2024-08-22 17:35:52 +00:00
James Zern	233e86b91f	Merge changes Ie43dc5ef,I94cd8bab into main * changes: DoFilter_: remove row & num_rows parameters Do*Filter_C: remove dead 'inverse' code paths	2024-08-19 18:51:06 +00:00
James Zern	1a29fd2fc3	make VP8LPredictor[01]_C() static Only predictors 2-13 are reused in lossless_enc.c. Change-Id: Ia3a7342fccfb44b9ad5297f48d6be2d96af68ec8	2024-08-16 10:58:45 -07:00
James Zern	dd9d3770d7	DoFilter_: remove row & num_rows parameters The row parameter became a constant in: `2102ccd` update the Unfilter API in dsp to process one row independently num_rows is always equal to height. Change-Id: Ie43dc5ef222e442ce8c92766da0b9824ccbca236	2024-08-12 19:36:31 -07:00
James Zern	ab451a495c	Do*Filter_C: remove dead 'inverse' code paths The inverse parameter became a constant in: `2102ccd` update the Unfilter API in dsp to process one row independently The row parameter to these functions is in a similar state; it will be removed in a follow up. Change-Id: I94cd8babe0e42474ff794ba5fa29dd48039de5f8	2024-08-08 18:13:48 -07:00
James Zern	f9a480f7c3	{TrueMotion,TM16}_NEON: remove zero extension Replace vmovl_u8 -> s16 + signed vaddq with unsigned vaddw. No change in assembly with clang-16 (armv7 & aarch64) and gcc-13 (aarch64). armv7 gcc-13 had kept the vmovl instructions, those are now gone. Change-Id: Ibb4fbdd5680d3e9dd06933c100528a6f363de472	2024-08-07 16:43:14 -07:00
James Zern	04834acae7	Merge changes I25c30a9e,I0a192fc6,I4cf89575 into main * changes: WASM: Enable VP8L_USE_FAST_LOAD WASM: don't use USE_GENERIC_TREE WASM: Enable 64-bit BITS caching	2024-08-01 18:36:34 +00:00
James Zern	39a602afe6	webp-lossless-bitstream-spec: normalize predictor transform ref Replace the only use of 'prediction transform' with 'predictor transform' which is used throughout the text. Spotted by thakis@. Bug: webp:355607636 Change-Id: Ic4e1f08e2ff94da82824cee24c86a4fa42c7ec28	2024-07-29 19:22:33 -07:00
James Zern	f28c837dc1	Merge "webp-container-spec: align anim pseudocode w/prose" into main	2024-07-29 17:17:16 +00:00
Vincent Rabaud	74be8e22d9	Fix implicit conversion issues Change-Id: If2cc8a137371ef365cf4a9c55f1b6ab131fba564	2024-07-25 22:30:15 +02:00
Vincent Rabaud	0c01db7c3c	Merge "Increase the transform bits if possible." into main	2024-07-19 22:44:13 +00:00
Vincent Rabaud	f2d6dc1eef	Increase the transform bits if possible. This brings minor size improvements because repetitive values in the transform images are easily explainable through LZ77. Still, it makes an upcoming pull request a bit more stable. This is a rollforward of `7ec51c5916` `ee26766a89` Change-Id: I254ab3ccd5053344f89099280e8d994ecd55aee0	2024-07-19 23:22:27 +02:00
James Zern	caa19e5b3a	update link to issue tracker https://bugs.chromium.org/p/webp -> https://issues.webmproject.org Change-Id: I45324210d4e9351238da127efd04b22627548545	2024-07-18 16:49:45 -07:00
James Zern	c9dd9bd40b	webp-container-spec: align anim pseudocode w/prose - add missing 'or application-defined color' when creating the canvas - reference 'dispose_method' before rendering image Bug: webp:646 Change-Id: I25734d1912644f0aa6c87e14b80ed8cd9b180056	2024-07-16 15:42:07 -07:00
wrv	8a7c8dc662	WASM: Enable VP8L_USE_FAST_LOAD It is 2-5% faster to use VP8L fast load on WASM Bug: webp:643 Change-Id: I25c30a9e6bcfc7cadd640122579eeebcb37e6fc0	2024-07-15 14:41:36 -05:00
wrv	f0c53cd966	WASM: don't use USE_GENERIC_TREE It is 2-4% faster to use hard-coded tree on WASM Bug: webp:643 Change-Id: I0a192fc6af210c79814a81084cd1f199714bf46c	2024-07-15 14:41:14 -05:00
wrv	eef903d04a	WASM: Enable 64-bit BITS caching Bug: webp:643 Change-Id: I4cf89575e0ebcfeaf9d84be8e188863657893a07	2024-07-15 14:40:45 -05:00
James Zern	6296cc8d0d	iterator_enc: make VP8IteratorReset() static This function is unused outside of iterator_enc.c. Change-Id: I0f1ecedeb9ed4d9f51d0135f04b8ef00424f24cc	2024-07-12 15:23:10 -07:00
James Zern	fbd93896a6	histogram_enc: make VP8LGetHistogramSize static This function is unused outside of histogram_enc.c. Change-Id: I527f54408383d0bc9d04878ca397a3d044b350de	2024-07-12 15:23:10 -07:00
James Zern	cc7ff5459a	cost_enc: make VP8CalculateLevelCosts[] static This table is unused outside of cost_enc.c. Change-Id: I0aa46554b8470fb09a7ffeae0e98d2356b40b671	2024-07-12 15:23:10 -07:00
James Zern	4e2828bae8	vp8l_dec: make VP8LClear() static This function is unused outside of vp8l_dec.c. Change-Id: I16733a44ea024ca9601c098641a3cd464bed2b53	2024-07-12 15:22:20 -07:00
James Zern	d742b24a88	Intra16Preds_NEON: fix truemotion saturation This needs to be done with signed saturation as the sum may be negative. fixes mismatch with C code after: `3bfb05e3` Add AArch64 Neon implementation of Intra16Preds Change-Id: I017e939d7155cc3489ceb76fc8ad50ac9917f23d	2024-07-11 13:37:06 -07:00
James Zern	c7bb4cb585	Intra4Preds_NEON: fix truemotion saturation This needs to be done with signed saturation as the sum may be negative. fixes mismatch with C code after: `baa93808` Add AArch64 Neon implementation of Intra4Preds Change-Id: I190c3d7f78cfd2c7ae83fb7059de41e307abda36	2024-07-11 13:37:06 -07:00
Vincent Rabaud	952a989b1b	Merge "Remove TODO now that log is using fixed point." into main	2024-07-11 20:11:37 +00:00
Vincent Rabaud	dde11574b0	Remove TODO now that log is using fixed point. Bug: webp:499 Change-Id: I39ab340ec6b5932db7535c6b7f31843c28de8415	2024-07-11 20:11:03 +00:00
Vincent Rabaud	a1ca153d51	Fix hidden myerr in my_error_exit Change-Id: I0e3660eff2f6a20446b6fcd2925312757fda424c	2024-07-11 15:59:57 +02:00
James Zern	3bd9420289	Merge changes Iff6e47ed,I24c67cd5,Id781e761 into main * changes: Use QuantizeBlock_NEON for VP8EncQuantizeBlockWHT on Arm Add AArch64 Neon implementation of Intra16Preds Add AArch64 Neon implementation of Intra4Preds	2024-07-11 02:04:42 +00:00
Vincent Rabaud	d27d246e42	Merge "Convert VP8LFastSLog2 to fixed point" into main	2024-07-10 21:52:39 +00:00
Vincent Rabaud	4838611f91	Disable msg_code use in fuzzing mode bug:oss-fuzz:70102 bug:oss-fuzz:70112 Change-Id: I519c991c784bccb4bbaec622281f81015ab479a4	2024-07-10 22:55:21 +02:00
Istvan Stefan	314a142a34	Use QuantizeBlock_NEON for VP8EncQuantizeBlockWHT on Arm Use the Neon implementation instead of falling back to QuantizeBlock_C. Change-Id: Iff6e47eda353cbaa9766f75040fa63aa34607816	2024-07-10 14:48:38 +01:00
Istvan Stefan	3bfb05e38c	Add AArch64 Neon implementation of Intra16Preds Add a Neon implementation of Intra16Preds for use on 64-bit Arm platforms. (This implementation cannot be used on 32-bit Arm platforms as it makes use of a number of AArch64-only Neon instructions.) Change-Id: I24c67cd54b66307e3924fd332c2795fd7422f082	2024-07-10 14:48:38 +01:00
Istvan Stefan	baa93808d9	Add AArch64 Neon implementation of Intra4Preds Add Neon implementation of Intra4Preds for use on 64-bit Arm platforms. (The same implementation cannot be used for 32-bit Arm platforms as it uses a number of AArch64-only Neon instructions.) Change-Id: Id781e7614f4e8e876dfeecd95cfc85e04611d8c6	2024-07-10 14:48:26 +01:00
Vincent Rabaud	41a5e582c2	Fix errors when compiling code as C++ Change-Id: Iba94e24e764038640f39d61fb2bc9cfb3434cc8f	2024-07-10 10:30:48 +02:00
Vincent Rabaud	fb444b692b	Convert VP8LFastSLog2 to fixed point Speedups: 1% with '-lossless', 2% with '-lossless -q 100 -m6' Change-Id: I1d79ea8e3e9e4bac7bcea4d7cbcc1bd56273988e	2024-07-09 16:42:21 +02:00
Vincent Rabaud	c1c89f5189	Fix WEBP_NODISCARD comment and C++ version Change-Id: I8b94974a46b7ac7d9bce179a48655ba8d9700edf	2024-07-09 14:24:00 +02:00
Vincent Rabaud	66408c2c7c	Switch the histogram_enc.h API to fixed point Speedups: 4% with '-lossless', 8% with '-lossless -q 100 -m6' Change-Id: I8f1c244b290d48132c1edc6a1c9fc3f79fef68ec	2024-07-09 13:39:45 +02:00
Vincent Rabaud	ac1e410ded	Remove leftover tiff dep Change-Id: I880ee0f8d0b80fb0f0e476baaf91fe545195c029	2024-07-05 13:39:23 +02:00
Vincent Rabaud	b78d39571f	Disable TIFF on fuzztest. oss-fuzz does not support dynamic libraries so we stick to static. But libtiff.a has undefined symbols on Ubuntu, so we have to disable TIFF. Change-Id: Ibd1932f5780a0af51b6b398ae6415eed8685a0be	2024-07-05 11:26:12 +02:00
Vincent Rabaud	cff21a7d87	Do not build statically on oss-fuzz. This is to get TIFF support for imageio. Change-Id: I3c9aea4b72661926568dc777644b7a9ddd499726	2024-07-04 15:44:12 +02:00
Vincent Rabaud	6853a8e5ac	Merge "Move more internal fuzzers to public." into main	2024-07-02 13:47:01 +00:00
Vincent Rabaud	9bc09db4b8	Merge "Convert VP8LFastLog2 to fixed point" into main	2024-07-02 09:48:16 +00:00
Vincent Rabaud	0a9f1c19f8	Convert VP8LFastLog2 to fixed point The lossless encoding speed-ups are: - up to 1% with default parameters - up to 4% in cruncher mode: -q 100 -m 6 Change-Id: Id92d4bad0b0a2c28c8aa9ff5280eea5717017f30	2024-07-02 10:29:38 +02:00
Vincent Rabaud	db0cb9c27e	Move more internal fuzzers to public. Change-Id: Idde75f374264666e4c54a17b1606464ad5d00d9c	2024-06-28 16:59:18 +02:00
James Zern	ff2b5b15ae	Merge "advanced_api_fuzzer.cc: use crop dims in OOM check" into main	2024-06-26 18:30:07 +00:00
Vincent Rabaud	c4af79d053	Put 0 at the end of a palette and do not store it. This only applies to kSortedDefault and kMinimizeDelta. Change-Id: I9d4178406ed2ef91c5c55f0a1919cfc6605fedf9	2024-06-25 14:46:05 +02:00
Vincent Rabaud	0ec80aef3d	Delete last references to delta palettization Change-Id: I1f931d3aa587d2ae82895ae7c7f4c94fb82fbfb1	2024-06-25 10:53:43 +02:00
James Zern	96d79f8481	advanced_api_fuzzer.cc: use crop dims in OOM check Apply crop dimensions before calculating scaled width/height to ensure the check against fuzz_utils::kFuzzPxLimit will use the same dimensions as the decoder. Bug: oss-fuzz:69873 Change-Id: Icd0862eadf8575135b6d53376acc79d14733a0e5	2024-06-24 16:35:06 -07:00
Vincent Rabaud	c35c7e0240	Fix huffman fuzzer to not leak. That was obviously intentional to see if the new fuzzer was working :) Bug: oss-fuzz=69825 Change-Id: Ie9465bb6225999fd792b712cc585ca3ee1ec528a	2024-06-24 10:13:16 +02:00
Vincent Rabaud	f2fe8decce	Bump fuzztest dependency. Also do not restrict the oss-fuzz script to libfuzzer: this allows new fuzzer engines to try the script as is. The libfuzzer restriction is done upstream anyway. Bug: oss-fuzz:69508 Change-Id: I15685bc7193bef3b9ccb0e0a30a6262e7bfb6fb9	2024-06-21 18:27:37 +02:00
Vincent Rabaud	9ce982fdf2	Fix fuzz tests to work on oss-fuzz - the tests now build/run/check_build with libfuzzer on oss-fuzz - centipede is removed as it builds/runs but do not check_build (timeout due to arguments not parsed correctly) Bug: oss-fuzz:69508 Change-Id: Id063565fc4cce02fc5e36c7d8499d6de9ff54345	2024-06-20 15:09:57 +02:00
Vincent Rabaud	3ba8af1a33	Do not escape quotes anymore in build.sh cat is used now, not an echo of a "". Change-Id: I36036ff2110f94a99bf1581c6c0d30326161314b	2024-06-11 13:35:44 +02:00
Vincent Rabaud	ea0e121b6a	Allow centipede to be used as a fuzzing engine. fuzztest is compatible with libfuzzer in compatibility mode, and fully compatible with centipede by default. Change-Id: I0c8e636df642dede16d394d678008c5e064094b3	2024-06-10 18:13:37 +02:00
James Zern	27731afd47	make VP8I4ModeOffsets & VP8MakeIntra4Preds static These are unused outside of quant_enc.c. Change-Id: I2c5cd0df28c25f279cd05667b327fea14f3fa50e	2024-06-06 19:07:48 -07:00
James Zern	ddd6245eb7	oss-fuzz/build.sh: use heredoc for script creation Rather than a quoted multi-line echo. Change-Id: Ib51fa5693f2946e2bc991dc66a6b3449e6ee61c0	2024-06-06 10:31:33 -07:00
James Zern	50074930e3	oss-fuzz/build.sh,cosmetics: fix indent Change-Id: I1b9ddb5a531fc829fc7374ecefe31a38ac27e02c	2024-06-06 10:28:13 -07:00
Vincent Rabaud	20e92f7d40	Limit the possible fuzz engines. Change-Id: I8f2fd84bc7175e4e74c4fb418fcc4f5549018ac3	2024-06-06 13:21:09 +02:00
Vincent Rabaud	4f200de591	Switch public fuzz tests to fuzztest. Change-Id: I75afb65058690585bbf2671c27d6a99a87bcaab7	2024-06-05 14:08:27 +02:00
wrv	64186bb36c	Add huffman_fuzzer to .gitignore Change-Id: I1ec33a29beb1a9111e57c85ef67e4c4616908c56	2024-06-01 10:21:54 -05:00
Vincent Rabaud	0905f61c85	Move build script from oss-fuzz repo to here. This requires a change in the oss-fuzz repo similar to https://github.com/AOMediaCodec/libavif/blob/main/tests/oss-fuzz/build.sh Change-Id: I7bcdb52dcec7e6edd926aea93988cd758ef9a854	2024-05-31 22:02:17 +02:00
wrv	e86787586b	Fix link to Javascript documentation Change-Id: Ia65d307570a3dff1fc6894abb42dfaaaaa74f086	2024-05-30 17:06:57 -05:00
wrv	5e5b8f0c95	Fix SSE2 Transform_AC3 function name Change-Id: I5fda3221612beafc3548d2abfa7c1e3f686aaaf0	2024-05-29 21:41:14 -05:00
James Zern	45129ee027	Revert "Check all the rows." This reverts commit `ee26766a89`. This change also reverts the parent. Revert "Increase the transform bits if possible." This reverts commit `7ec51c5916`. These changes result in non-lossless encodes. Bug: oss-fuzz:69231, oss-fuzz:69109, oss-fuzz:69208 Bug: b:341475869, b:342743143 Change-Id: Ia28f558992e0aa6f024af1ff66da52e0a5e26fa3	2024-05-25 11:00:32 -07:00
Vincent Rabaud	ee26766a89	Check all the rows. A 3 by 1 image would not have its 1st and 3rd lines compared at the second iteration. BUG=oss-fuzz:69208 Change-Id: I9213e73995d31907f358310a0b7d5ebb21c1f8b2	2024-05-24 23:11:20 +02:00
Vincent Rabaud	7ec51c5916	Increase the transform bits if possible. This brings minor size improvements because repetitive values in the transform images are easily explainable through LZ77. Still, it makes an upcoming pull request a bit more stable. This is `971a03d820` with a fix to not forget to analyze the end of the line. A const has also been added to match VP8LColorSpaceTransform's signature. Change-Id: Iae03216fef298c7abc96a766f8a799552b05ade5	2024-05-23 14:04:34 +02:00
James Zern	3cd16fd3e2	Revert "Increase the transform bits if possible." This reverts commit `971a03d820`. Reason for revert: This creates non-lossless encodes. Original change's description: > Increase the transform bits if possible. > > This brings minor size improvements because repetitive values in > the transform images are easily explainable through LZ77. Still, > it makes an upcoming pull request a bit more stable. > > Change-Id: I1c7135675cb59b5e27ca960738d74465f10d0deb Bug: oss-fuzz:69109, b:341475869 Change-Id: I3b9f21a5498735eb3681e62fb35bf9f9c2ed4f9f	2024-05-20 22:25:57 +00:00
Vincent Rabaud	971a03d820	Increase the transform bits if possible. This brings minor size improvements because repetitive values in the transform images are easily explainable through LZ77. Still, it makes an upcoming pull request a bit more stable. Change-Id: I1c7135675cb59b5e27ca960738d74465f10d0deb	2024-05-17 15:19:03 +02:00
Vincent Rabaud	1bf198a22b	Allow transform_bits to be different during encoding. The spec allows it but it is currently forced to the same value for simplicity. Change-Id: I26197dbf3342f7a72115cc7f7805c154313a2afb	2024-05-13 16:56:19 +02:00
Vincent Rabaud	1e462ca80e	Define MAX_TRANSFORM_BITS according to the specification. Change-Id: I0d575aa84e143bea56b55deb8f42b44e13dd5f1e	2024-05-07 09:16:02 +02:00
Vincent Rabaud	64d1ec23ac	Use (MIN/NUM)_(TRANSFORM/HUFFMAN)_BITS where appropriate Change-Id: I849ff8864f7abcc723dfe2b7ee0f290c8ee89c3f	2024-05-06 22:46:44 +02:00
Vincent Rabaud	a90160e11a	Refactor histograms in predictors. Replace the 2d histograms with uint32_t 1d versions (to avoid pointer casting and to use the optimized VP8LAddVectorEq). Change-Id: I90b0fe98390b49e3fd03e3484289571cf7ae6eca	2024-05-03 22:09:38 +02:00
Vincent Rabaud	a7aa7525b8	Fix some function declarations - fix some function declarations to match the implementation - fix some consts Change-Id: I8c89f49ec68d3dd1db7f8ee5cac73777f52d2576	2024-05-03 20:10:46 +02:00
James Zern	68ff4e1efe	Merge "jpegdec: add a hint for EOF/READ errors" into main	2024-05-02 20:53:54 +00:00
James Zern	79e7968ad0	jpegdec: add a hint for EOF/READ errors `jpegtran -copy all` may be able to process the file into something libjpeg can read. Bug: webp:562 Change-Id: I27a8a7a841c1d8d54f47c7c52e289d9d8c549220	2024-05-01 12:32:47 -07:00
James Zern	d33455cd31	man/*: s/BUGS/REPORTING BUGS/ BUGS is described by the man-pages man page as: A list of limitations, known defects or inconveniences, and other questionable activities. Change-Id: I2781d48d7a01c173fd24ac0b60af1a84aec31c3b	2024-05-01 10:39:00 -07:00
James Zern	a67ff735a2	normalize example exit status Use EXIT_SUCCESS / EXIT_FAILURE in most cases as more granularity isn't useful. For anim_diff, use 0 (success), 1 (image difference) and 2 (error) to align it with other diff utilities (diff, etc.). Bug: webp:637 Change-Id: I52925de8622a5a4d2141883279d69a1d95ef9b12	2024-05-01 10:38:44 -07:00
James Zern	edc289092a	upsampling_{neon,sse41}: fix int sanitizer warning fixes warnings of the form: /src/dsp/upsampling_sse41.c:170:1: runtime error: implicit conversion from type 'int' of value -16 (32-bit, signed) to type 'uintptr_t' (aka 'unsigned long') changed the value to 18446744073709551600 (64-bit, unsigned) this is the same change as was done previously in upsampling_sse2.c: `2ee786c7` upsampling_sse2.c: clear int sanitizer warnings Change-Id: I36064d837ad1a7a118918c16a5551fc732dec2ff	2024-04-30 13:06:09 -07:00
James Zern	3cada4cef4	ImgIoUtilReadFile: check ftell() return This avoids attempting to allocate 0 bytes if the call fails. (An additional byte is added to the result to allow a '\0' to be appended.) Bug: chromium:334120888 Change-Id: Ifcb8ff7744c567cbd08051aa04cc66acf936078d	2024-04-19 12:50:01 -07:00
James Zern	dc9505855e	Merge tag 'v1.4.0' libwebp-1.4.0 - 4/12/2024: version 1.4.0 This is a binary compatible release. * API changes: - libwebpmux: WebPAnimEncoderSetChunk, WebPAnimEncoderGetChunk, WebPAnimEncoderDeleteChunk - libsharpyuv: SharpYuvOptionsInit, SharpYuvConvertWithOptions - extras: SharpYuvEstimate420Risk * further security related hardening in libwebp & examples * some minor optimizations in the lossless encoder * added WEBP_NODISCARD to report unused result warnings; enable with -DWEBP_ENABLE_NODISCARD=1 * improvements and corrections in webp-container-spec.txt and webp-lossless-bitstream-spec.txt (#611) * miscellaneous warning, bug & build fixes (#615, #619, #632, #635) Bug: webp:627 * tag 'v1.4.0': update ChangeLog update NEWS provide a way to opt-out/override WEBP_NODISCARD update ChangeLog NEWS: fix date vwebp: fix window title when options are given update NEWS bump version to 1.4.0 update AUTHORS Change-Id: Ib09dee63d968b50166c9177f571d68082e50ca76	2024-04-12 19:05:47 -07:00
James Zern	845d5476a8	update ChangeLog Bug: webp:627 Change-Id: I67a011b62087a8ad64df3b5eb0a1c051adeea60f	2024-04-12 13:48:48 -07:00
James Zern	8a6a55bba8	update NEWS add a note about WEBP_NODISCARD / WEBP_ENABLE_NODISCARD Bug: webp:627 Change-Id: I018848eaef33dd47b922d6c95fc2842248cc02df	2024-04-12 12:34:22 -07:00
James Zern	cf7c5a5de8	provide a way to opt-out/override WEBP_NODISCARD Bug: webp:627 Change-Id: I95ef89f37e5b7bf0f806bcfc6a295f5168b1bf67	2024-04-11 12:34:46 -07:00