apply clang-format

(Debian clang-format version 19.1.7 (3+build4)) with `--style=Google`. Manual changes: * clang-format disabled around macros with stringification (mostly assembly) * some inline assembly strings were adjusted to avoid awkward line breaks * trailing commas, `//` or suffixes (`ull`) added to help array formatting * thread_utils.c: parameter comments were changed to the more common /*...=*/ style to improve formatting The automatically generated code under swig/ was skipped. Bug: 433996651 Change-Id: Iea3f24160d78d2a2653971cdf13fa932e47ff1b3
2025-12-24 14:06:27 +01:00 · 2025-07-28 18:23:12 -07:00
parent b569988d3f
commit 44257cb826
224 changed files with 16312 additions and 16734 deletions
--- a/examples/anim_diff.c
+++ b/examples/anim_diff.c
@@ -57,8 +57,8 @@ static WEBP_INLINE int PixelsAreSimilar(uint32_t src, uint32_t dst,
 }
 static int FramesAreSimilar(const uint8_t* const rgba1,
-                            const uint8_t* const rgba2,
+                            const uint8_t* const rgba2, int width, int height,
-                            int width, int height, int max_allowed_diff) {
+                            int max_allowed_diff) {
  int i, j;
  assert(max_allowed_diff > 0);
  for (j = 0; j < height; ++j) {
@@ -120,8 +120,7 @@ static int CompareBackgroundColor(uint32_t bg1, uint32_t bg2, int premultiply) {
    if (alpha1 == 0 && alpha2 == 0) return 1;
  }
  if (bg1 != bg2) {
-    fprintf(stderr, "Background color mismatch: 0x%08x vs 0x%08x\n",
+    fprintf(stderr, "Background color mismatch: 0x%08x vs 0x%08x\n", bg1, bg2);
            bg1, bg2);
    return 0;
  }
  return 1;
@@ -131,8 +130,7 @@ static int CompareBackgroundColor(uint32_t bg1, uint32_t bg2, int premultiply) {
 // is OK for other aspects like offsets, dispose/blend method to vary.
 static int CompareAnimatedImagePair(const AnimatedImage* const img1,
                                    const AnimatedImage* const img2,
-                                    int premultiply,
+                                    int premultiply, double min_psnr) {
                                    double min_psnr) {
  int ok = 1;
  const int is_multi_frame_image = (img1->num_frames > 1);
  uint32_t i;
@@ -141,8 +139,8 @@ static int CompareAnimatedImagePair(const AnimatedImage* const img1,
                      "Canvas width mismatch");
  ok &= CompareValues(img1->canvas_height, img2->canvas_height,
                      "Canvas height mismatch");
-  ok &= CompareValues(img1->num_frames, img2->num_frames,
+  ok &=
-                      "Frame count mismatch");
+      CompareValues(img1->num_frames, img2->num_frames, "Frame count mismatch");
  if (!ok) return 0;  // These are fatal failures, can't proceed.
  if (is_multi_frame_image) {  // Checks relevant for multi-frame images only.
@@ -178,8 +176,8 @@ static int CompareAnimatedImagePair(const AnimatedImage* const img1,
                   premultiply, &max_diff, &psnr);
    if (min_psnr > 0.) {
      if (psnr < min_psnr) {
-        fprintf(stderr, "Frame #%d, psnr = %.2lf (min_psnr = %f)\n", i,
+        fprintf(stderr, "Frame #%d, psnr = %.2lf (min_psnr = %f)\n", i, psnr,
-                psnr, min_psnr);
+                min_psnr);
        ok = 0;
      }
    } else {
@@ -199,7 +197,8 @@ static void Help(void) {
  printf("  -min_psnr <float> ... minimum per-frame PSNR\n");
  printf("  -raw_comparison ..... if this flag is not used, RGB is\n");
  printf("                        premultiplied before comparison\n");
-  printf("  -max_diff <int> ..... maximum allowed difference per channel\n"
+  printf(
      "  -max_diff <int> ..... maximum allowed difference per channel\n"
      "                        between corresponding pixels in subsequent\n"
      "                        frames\n");
  printf("  -h .................. this help\n");
@@ -217,7 +216,7 @@ int main(int argc, const char* argv[]) {
  int premultiply = 1;
  int max_diff = 0;
  int i, c;
-  const char* files[2] = { NULL, NULL };
+  const char* files[2] = {NULL, NULL};
  AnimatedImage images[2];
  INIT_WARGV(argc, argv);
@@ -253,9 +252,8 @@ int main(int argc, const char* argv[]) {
      GetAnimatedImageVersions(&dec_version, &demux_version);
      printf("WebP Decoder version: %d.%d.%d\nWebP Demux version: %d.%d.%d\n",
             (dec_version >> 16) & 0xff, (dec_version >> 8) & 0xff,
-             (dec_version >> 0) & 0xff,
+             (dec_version >> 0) & 0xff, (demux_version >> 16) & 0xff,
-             (demux_version >> 16) & 0xff, (demux_version >> 8) & 0xff,
+             (demux_version >> 8) & 0xff, (demux_version >> 0) & 0xff);
             (demux_version >> 0) & 0xff);
      FREE_WARGV_AND_RETURN(0);
    } else {
      if (!got_input1) {
@@ -278,7 +276,6 @@ int main(int argc, const char* argv[]) {
    FREE_WARGV_AND_RETURN(return_code);
  }
  if (!got_input2) {
    Help();
    FREE_WARGV_AND_RETURN(return_code);
@@ -301,8 +298,8 @@ int main(int argc, const char* argv[]) {
    }
  }
-  if (!CompareAnimatedImagePair(&images[0], &images[1],
+  if (!CompareAnimatedImagePair(&images[0], &images[1], premultiply,
-                                premultiply, min_psnr)) {
+                                min_psnr)) {
    WFPRINTF(stderr, "\nFiles %s and %s differ.\n", (const W_CHAR*)files[0],
             (const W_CHAR*)files[1]);
    return_code = 1;
@@ -311,7 +308,7 @@ int main(int argc, const char* argv[]) {
            (const W_CHAR*)files[1]);
    return_code = 0;
  }
- End:
+End:
  ClearAnimatedImage(&images[0]);
  ClearAnimatedImage(&images[1]);
  FREE_WARGV_AND_RETURN(return_code);
--- a/examples/anim_dump.c
+++ b/examples/anim_dump.c
@@ -29,7 +29,8 @@ static void Help(void) {
  printf("Usage: anim_dump [options] files...\n");
  printf("\nOptions:\n");
  printf("  -folder <string> .... dump folder (default: '.')\n");
-  printf("  -prefix <string> .... prefix for dumped frames "
+  printf(
      "  -prefix <string> .... prefix for dumped frames "
      "(default: 'dump_')\n");
  printf("  -tiff ............... save frames as TIFF\n");
  printf("  -pam ................ save frames as PAM\n");
@@ -82,17 +83,16 @@ int main(int argc, const char* argv[]) {
      GetAnimatedImageVersions(&dec_version, &demux_version);
      printf("WebP Decoder version: %d.%d.%d\nWebP Demux version: %d.%d.%d\n",
             (dec_version >> 16) & 0xff, (dec_version >> 8) & 0xff,
-             (dec_version >> 0) & 0xff,
+             (dec_version >> 0) & 0xff, (demux_version >> 16) & 0xff,
-             (demux_version >> 16) & 0xff, (demux_version >> 8) & 0xff,
+             (demux_version >> 8) & 0xff, (demux_version >> 0) & 0xff);
             (demux_version >> 0) & 0xff);
      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else {
      uint32_t i;
      AnimatedImage image;
      const W_CHAR* const file = GET_WARGV(argv, c);
      memset(&image, 0, sizeof(image));
-      WPRINTF("Decoding file: %s as %s/%sxxxx.%s\n",
+      WPRINTF("Decoding file: %s as %s/%sxxxx.%s\n", file, dump_folder, prefix,
-              file, dump_folder, prefix, suffix);
+              suffix);
      if (!ReadAnimatedImage((const char*)file, &image, 0, NULL)) {
        WFPRINTF(stderr, "Error decoding file: %s\n Aborting.\n", file);
        error = 1;
@@ -113,8 +113,8 @@ int main(int argc, const char* argv[]) {
        buffer.u.RGBA.rgba = image.frames[i].rgba;
        buffer.u.RGBA.stride = buffer.width * sizeof(uint32_t);
        buffer.u.RGBA.size = buffer.u.RGBA.stride * buffer.height;
-        WSNPRINTF(out_file, sizeof(out_file), "%s/%s%.4d.%s",
+        WSNPRINTF(out_file, sizeof(out_file), "%s/%s%.4d.%s", dump_folder,
-                  dump_folder, prefix, i, suffix);
+                  prefix, i, suffix);
        if (!WebPSaveImage(&buffer, format, (const char*)out_file)) {
          WFPRINTF(stderr, "Error while saving image '%s'\n", out_file);
          error = 1;
--- a/examples/anim_util.c
+++ b/examples/anim_util.c
@@ -41,8 +41,8 @@ static const int kNumChannels = 4;
 #if defined(WEBP_HAVE_GIF)
 // Returns true if the frame covers the full canvas.
-static int IsFullFrame(int width, int height,
+static int IsFullFrame(int width, int height, int canvas_width,
-                       int canvas_width, int canvas_height) {
+                       int canvas_height) {
  return (width == canvas_width && height == canvas_height);
 }
 #endif  // WEBP_HAVE_GIF
@@ -95,8 +95,8 @@ void ClearAnimatedImage(AnimatedImage* const image) {
 #if defined(WEBP_HAVE_GIF)
 // Clear the canvas to transparent.
-static void ZeroFillCanvas(uint8_t* rgba,
+static void ZeroFillCanvas(uint8_t* rgba, uint32_t canvas_width,
-                           uint32_t canvas_width, uint32_t canvas_height) {
+                           uint32_t canvas_height) {
  memset(rgba, 0, canvas_width * kNumChannels * canvas_height);
 }
@@ -113,16 +113,16 @@ static void ZeroFillFrameRect(uint8_t* rgba, int rgba_stride, int x_offset,
 }
 // Copy width * height pixels from 'src' to 'dst'.
-static void CopyCanvas(const uint8_t* src, uint8_t* dst,
+static void CopyCanvas(const uint8_t* src, uint8_t* dst, uint32_t width,
-                       uint32_t width, uint32_t height) {
+                       uint32_t height) {
  assert(src != NULL && dst != NULL);
  memcpy(dst, src, width * kNumChannels * height);
 }
 // Copy pixels in the given rectangle from 'src' to 'dst' honoring the 'stride'.
 static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
-                               int x_offset, int y_offset,
+                               int x_offset, int y_offset, int width,
-                               int width, int height) {
+                               int height) {
  int j;
  const int width_in_bytes = width * kNumChannels;
  const size_t offset = y_offset * stride + x_offset * kNumChannels;
@@ -138,8 +138,8 @@ static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
 #endif  // WEBP_HAVE_GIF
 // Canonicalize all transparent pixels to transparent black to aid comparison.
-static void CleanupTransparentPixels(uint32_t* rgba,
+static void CleanupTransparentPixels(uint32_t* rgba, uint32_t width,
-                                     uint32_t width, uint32_t height) {
+                                     uint32_t height) {
  const uint32_t* const rgba_end = rgba + width * height;
  while (rgba < rgba_end) {
    const uint8_t alpha = (*rgba >> 24) & 0xff;
@@ -152,8 +152,8 @@ static void CleanupTransparentPixels(uint32_t* rgba,
 // Dump frame to a PAM file. Returns true on success.
 static int DumpFrame(const char filename[], const char dump_folder[],
-                     uint32_t frame_num, const uint8_t rgba[],
+                     uint32_t frame_num, const uint8_t rgba[], int canvas_width,
-                     int canvas_width, int canvas_height) {
+                     int canvas_height) {
  int ok = 0;
  size_t max_len;
  int y;
@@ -166,8 +166,8 @@ static int DumpFrame(const char filename[], const char dump_folder[],
  base_name = WSTRRCHR(filename, '/');
  base_name = (base_name == NULL) ? (const W_CHAR*)filename : base_name + 1;
-  max_len = WSTRLEN(dump_folder) + 1 + WSTRLEN(base_name)
+  max_len = WSTRLEN(dump_folder) + 1 + WSTRLEN(base_name) + strlen("_frame_") +
-          + strlen("_frame_") + strlen(".pam") + 8;
+            strlen(".pam") + 8;
  file_name = (W_CHAR*)WebPMalloc(max_len * sizeof(*file_name));
  if (file_name == NULL) goto End;
@@ -183,7 +183,8 @@ static int DumpFrame(const char filename[], const char dump_folder[],
    ok = 0;
    goto End;
  }
-  if (fprintf(f, "P7\nWIDTH %d\nHEIGHT %d\n"
+  if (fprintf(f,
              "P7\nWIDTH %d\nHEIGHT %d\n"
              "DEPTH 4\nMAXVAL 255\nTUPLTYPE RGB_ALPHA\nENDHDR\n",
              canvas_width, canvas_height) < 0) {
    WFPRINTF(stderr, "Write error for file %s\n", file_name);
@@ -198,7 +199,7 @@ static int DumpFrame(const char filename[], const char dump_folder[],
    row += canvas_width * kNumChannels;
  }
  ok = 1;
- End:
+End:
  if (f != NULL) fclose(f);
  WebPFree(file_name);
  return ok;
@@ -266,8 +267,8 @@ static int ReadAnimatedWebP(const char filename[],
           image->canvas_width * kNumChannels * image->canvas_height);
    // Needed only because we may want to compare with GIF later.
-    CleanupTransparentPixels((uint32_t*)curr_rgba,
+    CleanupTransparentPixels((uint32_t*)curr_rgba, image->canvas_width,
-                             image->canvas_width, image->canvas_height);
+                             image->canvas_height);
    if (dump_frames && dump_ok) {
      dump_ok = DumpFrame(filename, dump_folder, frame_index, curr_rgba,
@@ -283,7 +284,7 @@ static int ReadAnimatedWebP(const char filename[],
  ok = dump_ok;
  if (ok) image->format = ANIM_WEBP;
- End:
+End:
  WebPAnimDecoderDelete(dec);
  return ok;
 }
@@ -303,12 +304,11 @@ static int IsGIF(const WebPData* const data) {
 // GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
 #if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
-# define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
+#define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
-# define LOCAL_GIF_PREREQ(maj, min) \
+#define LOCAL_GIF_PREREQ(maj, min) (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
    (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
 #else
-# define LOCAL_GIF_VERSION 0
+#define LOCAL_GIF_VERSION 0
-# define LOCAL_GIF_PREREQ(maj, min) 0
+#define LOCAL_GIF_PREREQ(maj, min) 0
 #endif
 #if !LOCAL_GIF_PREREQ(5, 0)
@@ -357,8 +357,8 @@ static int DGifSavedExtensionToGCB(GifFileType* GifFile, int ImageIndex,
  for (i = 0; i < GifFile->SavedImages[ImageIndex].ExtensionBlockCount; i++) {
    ExtensionBlock* ep = &GifFile->SavedImages[ImageIndex].ExtensionBlocks[i];
    if (ep->Function == GRAPHICS_EXT_FUNC_CODE) {
-      return DGifExtensionToGCB(
+      return DGifExtensionToGCB(ep->ByteCount, (const GifByteType*)ep->Bytes,
-          ep->ByteCount, (const GifByteType*)ep->Bytes, gcb);
+                                gcb);
    }
  }
  return GIF_ERROR;
@@ -377,12 +377,12 @@ static int DGifSavedExtensionToGCB(GifFileType* GifFile, int ImageIndex,
 #endif
 static int IsKeyFrameGIF(const GifImageDesc* prev_desc, int prev_dispose,
-                         const DecodedFrame* const prev_frame,
+                         const DecodedFrame* const prev_frame, int canvas_width,
-                         int canvas_width, int canvas_height) {
+                         int canvas_height) {
  if (prev_frame == NULL) return 1;
  if (prev_dispose == DISPOSE_BACKGROUND) {
-    if (IsFullFrame(prev_desc->Width, prev_desc->Height,
+    if (IsFullFrame(prev_desc->Width, prev_desc->Height, canvas_width,
-                    canvas_width, canvas_height)) {
+                    canvas_height)) {
      return 1;
    }
    if (prev_frame->is_key_frame) return 1;
@@ -403,14 +403,12 @@ static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
  if (transparent_index != NO_TRANSPARENT_COLOR &&
      gif->SBackGroundColor == transparent_index) {
    return 0x00000000;  // Special case: transparent black.
-  } else if (color_map == NULL || color_map->Colors == NULL
+  } else if (color_map == NULL || color_map->Colors == NULL ||
-             || gif->SBackGroundColor >= color_map->ColorCount) {
+             gif->SBackGroundColor >= color_map->ColorCount) {
    return 0xffffffff;  // Invalid: assume white.
  } else {
    const GifColorType color = color_map->Colors[gif->SBackGroundColor];
-    return (0xffu << 24) |
+    return (0xffu << 24) | (color.Red << 16) | (color.Green << 8) |
           (color.Red << 16) |
           (color.Green << 8) |
           (color.Blue << 0);
  }
 }
@@ -435,11 +433,10 @@ static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
          (eb1->ByteCount == 11) &&
          (!memcmp(signature, "NETSCAPE2.0", 11) ||
           !memcmp(signature, "ANIMEXTS1.0", 11));
-      if (signature_is_ok &&
+      if (signature_is_ok && eb2->Function == CONTINUE_EXT_FUNC_CODE &&
-          eb2->Function == CONTINUE_EXT_FUNC_CODE && eb2->ByteCount >= 3 &&
+          eb2->ByteCount >= 3 && eb2->Bytes[0] == 1) {
-          eb2->Bytes[0] == 1) {
+        const uint32_t extra_loop =
-        const uint32_t extra_loop = ((uint32_t)(eb2->Bytes[2]) << 8) +
+            ((uint32_t)(eb2->Bytes[2]) << 8) + ((uint32_t)(eb2->Bytes[1]) << 0);
                                    ((uint32_t)(eb2->Bytes[1]) << 0);
        return (extra_loop > 0) ? extra_loop + 1 : 0;
      }
    }
@@ -535,8 +532,8 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
  image->canvas_height = (uint32_t)gif->SHeight;
  if (image->canvas_width > MAX_CANVAS_SIZE ||
      image->canvas_height > MAX_CANVAS_SIZE) {
-    fprintf(stderr, "Invalid canvas dimension: %d x %d\n",
+    fprintf(stderr, "Invalid canvas dimension: %d x %d\n", image->canvas_width,
-            image->canvas_width, image->canvas_height);
+            image->canvas_height);
    DGifCloseFile(gif, NULL);
    return 0;
  }
@@ -611,11 +608,9 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
        CopyCanvas(prev_rgba, curr_rgba, canvas_width, canvas_height);
        // Dispose previous frame rectangle.
-        prev_frame_disposed =
+        prev_frame_disposed = (prev_gcb.DisposalMode == DISPOSE_BACKGROUND ||
            (prev_gcb.DisposalMode == DISPOSE_BACKGROUND ||
                               prev_gcb.DisposalMode == DISPOSE_PREVIOUS);
-        curr_frame_opaque =
+        curr_frame_opaque = (curr_gcb.TransparentColor == NO_TRANSPARENT_COLOR);
            (curr_gcb.TransparentColor == NO_TRANSPARENT_COLOR);
        prev_frame_completely_covered =
            curr_frame_opaque &&
            CoversFrameGIF(&curr_gif_image->ImageDesc, prev_desc);
@@ -643,9 +638,9 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
                uint8_t* const src_frame_rgba =
                    image->frames[src_frame_num].rgba;
                CopyFrameRectangle(src_frame_rgba, curr_rgba,
-                                   canvas_width_in_bytes,
+                                   canvas_width_in_bytes, prev_desc->Left,
-                                   prev_desc->Left, prev_desc->Top,
+                                   prev_desc->Top, prev_desc->Width,
-                                   prev_desc->Width, prev_desc->Height);
+                                   prev_desc->Height);
              } else {
                // Source canvas doesn't exist. So clear previous frame
                // rectangle to background.
@@ -670,8 +665,8 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
    }
    if (dump_frames) {
-      if (!DumpFrame(filename, dump_folder, i, curr_rgba,
+      if (!DumpFrame(filename, dump_folder, i, curr_rgba, canvas_width,
-                     canvas_width, canvas_height)) {
+                     canvas_height)) {
        DGifCloseFile(gif, NULL);
        return 0;
      }
@@ -695,7 +690,8 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
  (void)image;
  (void)dump_frames;
  (void)dump_folder;
-  fprintf(stderr, "GIF support not compiled. Please install the libgif-dev "
+  fprintf(stderr,
          "GIF support not compiled. Please install the libgif-dev "
          "package before building.\n");
  return 0;
 }
@@ -718,8 +714,8 @@ int ReadAnimatedImage(const char filename[], AnimatedImage* const image,
  }
  if (IsWebP(&webp_data)) {
-    ok = ReadAnimatedWebP(filename, &webp_data, image, dump_frames,
+    ok =
-                          dump_folder);
+        ReadAnimatedWebP(filename, &webp_data, image, dump_frames, dump_folder);
  } else if (IsGIF(&webp_data)) {
    ok = ReadAnimatedGIF(filename, image, dump_frames, dump_folder);
  } else {
@@ -763,8 +759,7 @@ void GetDiffAndPSNR(const uint8_t rgba1[], const uint8_t rgba2[],
        // premultiply R/G/B channels with alpha value
        for (k = 0; k < kAlphaChannel; ++k) {
          Accumulate(rgba1[offset + k] * alpha1 / 255.,
-                     rgba2[offset + k] * alpha2 / 255.,
+                     rgba2[offset + k] * alpha2 / 255., &f_max_diff, &sse);
                     &f_max_diff, &sse);
        }
      }
    }
--- a/examples/anim_util.h
+++ b/examples/anim_util.h
@@ -22,10 +22,7 @@
 extern "C" {
 #endif
-typedef enum {
+typedef enum { ANIM_GIF, ANIM_WEBP } AnimatedFileFormat;
  ANIM_GIF,
  ANIM_WEBP
 } AnimatedFileFormat;
 typedef struct {
  uint8_t* rgba;     // Decoded and reconstructed full frame.
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@@ -66,12 +66,12 @@ static int ReadYUV(const uint8_t* const data, size_t data_size,
  pic->use_argb = 0;
  if (!WebPPictureAlloc(pic)) return 0;
-  ImgIoUtilCopyPlane(data, pic->width, pic->y, pic->y_stride,
+  ImgIoUtilCopyPlane(data, pic->width, pic->y, pic->y_stride, pic->width,
-                     pic->width, pic->height);
+                     pic->height);
-  ImgIoUtilCopyPlane(data + y_plane_size, uv_width,
+  ImgIoUtilCopyPlane(data + y_plane_size, uv_width, pic->u, pic->uv_stride,
-                     pic->u, pic->uv_stride, uv_width, uv_height);
+                     uv_width, uv_height);
-  ImgIoUtilCopyPlane(data + y_plane_size + uv_plane_size, uv_width,
+  ImgIoUtilCopyPlane(data + y_plane_size + uv_plane_size, uv_width, pic->v,
-                     pic->v, pic->uv_stride, uv_width, uv_height);
+                     pic->uv_stride, uv_width, uv_height);
  return use_argb ? WebPPictureYUVAToARGB(pic) : 1;
 }
@@ -119,7 +119,7 @@ static int ReadPicture(const char* const filename, WebPPicture* const pic,
    // If image size is specified, infer it as YUV format.
    ok = ReadYUV(data, data_size, pic);
  }
- End:
+End:
  if (!ok) {
    WFPRINTF(stderr, "Error! Could not process file %s\n",
             (const W_CHAR*)filename);
@@ -168,8 +168,8 @@ static void PrintValues(const int values[4]) {
 static void PrintFullLosslessInfo(const WebPAuxStats* const stats,
                                  const char* const description) {
-  fprintf(stderr, "Lossless-%s compressed size: %d bytes\n",
+  fprintf(stderr, "Lossless-%s compressed size: %d bytes\n", description,
-          description, stats->lossless_size);
+          stats->lossless_size);
  fprintf(stderr, "  * Header size: %d bytes, image data size: %d\n",
          stats->lossless_hdr_size, stats->lossless_data_size);
  if (stats->lossless_features) {
@@ -209,8 +209,7 @@ static void PrintExtraInfoLossless(const WebPPicture* const pic,
 }
 static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
-                                int full_details,
+                                int full_details, const char* const file_name) {
                                const char* const file_name) {
  const WebPAuxStats* const stats = pic->stats;
  if (short_output) {
    fprintf(stderr, "%7d %2.2f\n", stats->coded_size, stats->PSNR[3]);
@@ -220,24 +219,24 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
    const int num_skip = stats->block_count[2];
    const int total = num_i4 + num_i16;
    WFPRINTF(stderr, "File:      %s\n", (const W_CHAR*)file_name);
-    fprintf(stderr, "Dimension: %d x %d%s\n",
+    fprintf(stderr, "Dimension: %d x %d%s\n", pic->width, pic->height,
            pic->width, pic->height,
            stats->alpha_data_size ? " (with alpha)" : "");
-    fprintf(stderr, "Output:    "
+    fprintf(stderr,
            "Output:    "
            "%d bytes Y-U-V-All-PSNR %2.2f %2.2f %2.2f   %2.2f dB\n"
            "           (%.2f bpp)\n",
-            stats->coded_size,
+            stats->coded_size, stats->PSNR[0], stats->PSNR[1], stats->PSNR[2],
-            stats->PSNR[0], stats->PSNR[1], stats->PSNR[2], stats->PSNR[3],
+            stats->PSNR[3], 8.f * stats->coded_size / pic->width / pic->height);
            8.f * stats->coded_size / pic->width / pic->height);
    if (total > 0) {
-      int totals[4] = { 0, 0, 0, 0 };
+      int totals[4] = {0, 0, 0, 0};
-      fprintf(stderr, "block count:  intra4:     %6d  (%.2f%%)\n"
+      fprintf(stderr,
              "block count:  intra4:     %6d  (%.2f%%)\n"
              "              intra16:    %6d  (%.2f%%)\n"
              "              skipped:    %6d  (%.2f%%)\n",
-              num_i4, 100.f * num_i4 / total,
+              num_i4, 100.f * num_i4 / total, num_i16, 100.f * num_i16 / total,
              num_i16, 100.f * num_i16 / total,
              num_skip, 100.f * num_skip / total);
-      fprintf(stderr, "bytes used:  header:         %6d  (%.1f%%)\n"
+      fprintf(stderr,
              "bytes used:  header:         %6d  (%.1f%%)\n"
              "             mode-partition: %6d  (%.1f%%)\n",
              stats->header_bytes[0],
              100.f * stats->header_bytes[0] / stats->coded_size,
@@ -247,7 +246,8 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
        fprintf(stderr, "             transparency:   %6d (%.1f dB)\n",
                stats->alpha_data_size, stats->PSNR[4]);
      }
-      fprintf(stderr, " Residuals bytes  "
+      fprintf(stderr,
              " Residuals bytes  "
              "|segment 1|segment 2|segment 3"
              "|segment 4|  total\n");
      if (full_details) {
@@ -346,7 +346,7 @@ static int DumpPicture(const WebPPicture* const picture, const char* PGM_name) {
  }
  ok = 1;
- Error:
+Error:
  fclose(f);
  return ok;
 }
@@ -411,9 +411,9 @@ static int WriteMetadataChunk(FILE* const out, const char fourcc[4],
 // Sets 'flag' in 'vp8x_flags' and updates 'metadata_size' with the size of the
 // chunk if there is metadata and 'keep' is true.
-static int UpdateFlagsAndSize(const MetadataPayload* const payload,
+static int UpdateFlagsAndSize(const MetadataPayload* const payload, int keep,
-                              int keep, int flag,
+                              int flag, uint32_t* vp8x_flags,
-                              uint32_t* vp8x_flags, uint64_t* metadata_size) {
+                              uint64_t* metadata_size) {
  if (keep && payload->bytes != NULL && payload->size > 0) {
    *vp8x_flags |= flag;
    *metadata_size += kChunkHeaderSize + payload->size + (payload->size & 1);
@@ -442,14 +442,14 @@ static int WriteWebPWithMetadata(FILE* const out,
  const size_t kMinSize = kRiffHeaderSize + kChunkHeaderSize;
  uint32_t flags = 0;
  uint64_t metadata_size = 0;
-  const int write_exif = UpdateFlagsAndSize(&metadata->exif,
+  const int write_exif =
-                                            !!(keep_metadata & METADATA_EXIF),
+      UpdateFlagsAndSize(&metadata->exif, !!(keep_metadata & METADATA_EXIF),
                         kEXIFFlag, &flags, &metadata_size);
-  const int write_iccp = UpdateFlagsAndSize(&metadata->iccp,
+  const int write_iccp =
-                                            !!(keep_metadata & METADATA_ICC),
+      UpdateFlagsAndSize(&metadata->iccp, !!(keep_metadata & METADATA_ICC),
                         kICCPFlag, &flags, &metadata_size);
-  const int write_xmp  = UpdateFlagsAndSize(&metadata->xmp,
+  const int write_xmp =
-                                            !!(keep_metadata & METADATA_XMP),
+      UpdateFlagsAndSize(&metadata->xmp, !!(keep_metadata & METADATA_XMP),
                         kXMPFlag, &flags, &metadata_size);
  uint8_t* webp = memory_writer->mem;
  size_t webp_size = memory_writer->size;
@@ -458,7 +458,8 @@ static int WriteWebPWithMetadata(FILE* const out,
  if (webp_size < kMinSize) return 0;
  if (webp_size - kChunkHeaderSize + metadata_size > kMaxChunkPayload) {
-    fprintf(stderr, "Error! Addition of metadata would exceed "
+    fprintf(stderr,
            "Error! Addition of metadata would exceed "
            "container size limit.\n");
    return 0;
  }
@@ -466,9 +467,9 @@ static int WriteWebPWithMetadata(FILE* const out,
  if (metadata_size > 0) {
    const int kVP8XChunkSize = 18;
    const int has_vp8x = !memcmp(webp + kRiffHeaderSize, "VP8X", kTagSize);
-    const uint32_t riff_size = (uint32_t)(webp_size - kChunkHeaderSize +
+    const uint32_t riff_size =
-                                          (has_vp8x ? 0 : kVP8XChunkSize) +
+        (uint32_t)(webp_size - kChunkHeaderSize +
-                                          metadata_size);
+                   (has_vp8x ? 0 : kVP8XChunkSize) + metadata_size);
    // RIFF
    int ok = (fwrite(webp, kTagSize, 1, out) == 1);
    // RIFF size (file header size is not recorded)
@@ -527,8 +528,7 @@ enum {
  RESIZE_MODE_DEFAULT = RESIZE_MODE_ALWAYS
 };
-static void ApplyResizeMode(const int resize_mode,
+static void ApplyResizeMode(const int resize_mode, const WebPPicture* const pic,
                            const WebPPicture* const pic,
                            int* const resize_w, int* const resize_h) {
  const int src_w = pic->width;
  const int src_h = pic->height;
@@ -536,8 +536,7 @@ static void ApplyResizeMode(const int resize_mode,
  const int dst_h = *resize_h;
  if (resize_mode == RESIZE_MODE_DOWN_ONLY) {
-    if ((dst_w == 0 && src_h <= dst_h) ||
+    if ((dst_w == 0 && src_h <= dst_h) || (dst_h == 0 && src_w <= dst_w) ||
        (dst_h == 0 && src_w <= dst_w) ||
        (src_w <= dst_w && src_h <= dst_h)) {
      *resize_w = *resize_h = 0;
    }
@@ -551,8 +550,7 @@ static void ApplyResizeMode(const int resize_mode,
 //------------------------------------------------------------------------------
 static int ProgressReport(int percent, const WebPPicture* const picture) {
-  fprintf(stderr, "[%s]: %3d %%      \r",
+  fprintf(stderr, "[%s]: %3d %%      \r", (char*)picture->user_data, percent);
          (char*)picture->user_data, percent);
  return 1;  // all ok
 }
@@ -569,7 +567,8 @@ static void HelpShort(void) {
 static void HelpLong(void) {
  printf("Usage:\n");
  printf(" cwebp [-preset <...>] [options] in_file [-o out_file]\n\n");
-  printf("If input size (-s) for an image is not specified, it is\n"
+  printf(
      "If input size (-s) for an image is not specified, it is\n"
      "assumed to be a PNG, JPEG, TIFF or WebP file.\n");
  printf("Note: Animated PNG and WebP files are not supported.\n");
 #ifdef HAVE_WINCODEC_H
@@ -578,45 +577,58 @@ static void HelpLong(void) {
  printf("\nOptions:\n");
  printf("  -h / -help ............. short help\n");
  printf("  -H / -longhelp ......... long help\n");
-  printf("  -q <float> ............. quality factor (0:small..100:big), "
+  printf(
      "  -q <float> ............. quality factor (0:small..100:big), "
      "default=75\n");
-  printf("  -alpha_q <int> ......... transparency-compression quality (0..100),"
+  printf(
      "  -alpha_q <int> ......... transparency-compression quality (0..100),"
      "\n                           default=100\n");
  printf("  -preset <string> ....... preset setting, one of:\n");
  printf("                            default, photo, picture,\n");
  printf("                            drawing, icon, text\n");
  printf("     -preset must come first, as it overwrites other parameters\n");
-  printf("  -z <int> ............... activates lossless preset with given\n"
+  printf(
      "  -z <int> ............... activates lossless preset with given\n"
      "                           level in [0:fast, ..., 9:slowest]\n");
  printf("\n");
-  printf("  -m <int> ............... compression method (0=fast, 6=slowest), "
+  printf(
      "  -m <int> ............... compression method (0=fast, 6=slowest), "
      "default=4\n");
-  printf("  -segments <int> ........ number of segments to use (1..4), "
+  printf(
      "  -segments <int> ........ number of segments to use (1..4), "
      "default=4\n");
  printf("  -size <int> ............ target size (in bytes)\n");
  printf("  -psnr <float> .......... target PSNR (in dB. typically: 42)\n");
  printf("\n");
  printf("  -s <int> <int> ......... input size (width x height) for YUV\n");
-  printf("  -sns <int> ............. spatial noise shaping (0:off, 100:max), "
+  printf(
      "  -sns <int> ............. spatial noise shaping (0:off, 100:max), "
      "default=50\n");
-  printf("  -f <int> ............... filter strength (0=off..100), "
+  printf(
      "  -f <int> ............... filter strength (0=off..100), "
      "default=60\n");
-  printf("  -sharpness <int> ....... "
+  printf(
      "  -sharpness <int> ....... "
      "filter sharpness (0:most .. 7:least sharp), default=0\n");
-  printf("  -strong ................ use strong filter instead "
+  printf(
      "  -strong ................ use strong filter instead "
      "of simple (default)\n");
  printf("  -nostrong .............. use simple filter instead of strong\n");
-  printf("  -sharp_yuv ............. use sharper (and slower) RGB->YUV "
+  printf(
      "  -sharp_yuv ............. use sharper (and slower) RGB->YUV "
      "conversion\n");
  printf("  -partition_limit <int> . limit quality to fit the 512k limit on\n");
-  printf("                           "
+  printf(
      "                           "
      "the first partition (0=no degradation ... 100=full)\n");
  printf("  -pass <int> ............ analysis pass number (1..10)\n");
-  printf("  -qrange <min> <max> .... specifies the permissible quality range\n"
+  printf(
      "  -qrange <min> <max> .... specifies the permissible quality range\n"
      "                           (default: 0 100)\n");
  printf("  -crop <x> <y> <w> <h> .. crop picture with the given rectangle\n");
  printf("  -resize <w> <h> ........ resize picture (*after* any cropping)\n");
-  printf("  -resize_mode <string> .. one of: up_only, down_only,"
+  printf(
      "  -resize_mode <string> .. one of: up_only, down_only,"
      " always (default)\n");
  printf("  -mt .................... use multi-threading if available\n");
  printf("  -low_memory ............ reduce memory usage (slower encoding)\n");
@@ -625,19 +637,23 @@ static void HelpLong(void) {
  printf("  -print_ssim ............ prints averaged SSIM distortion\n");
  printf("  -print_lsim ............ prints local-similarity distortion\n");
  printf("  -d <file.pgm> .......... dump the compressed output (PGM file)\n");
-  printf("  -alpha_method <int> .... transparency-compression method (0..1), "
+  printf(
      "  -alpha_method <int> .... transparency-compression method (0..1), "
      "default=1\n");
  printf("  -alpha_filter <string> . predictive filtering for alpha plane,\n");
  printf("                           one of: none, fast (default) or best\n");
-  printf("  -exact ................. preserve RGB values in transparent area, "
+  printf(
      "  -exact ................. preserve RGB values in transparent area, "
      "default=off\n");
-  printf("  -blend_alpha <hex> ..... blend colors against background color\n"
+  printf(
      "  -blend_alpha <hex> ..... blend colors against background color\n"
      "                           expressed as RGB values written in\n"
      "                           hexadecimal, e.g. 0xc0e0d0 for red=0xc0\n"
      "                           green=0xe0 and blue=0xd0\n");
  printf("  -noalpha ............... discard any transparency information\n");
  printf("  -lossless .............. encode image losslessly, default=off\n");
-  printf("  -near_lossless <int> ... use near-lossless image preprocessing\n"
+  printf(
      "  -near_lossless <int> ... use near-lossless image preprocessing\n"
      "                           (0..100=off), default=100\n");
  printf("  -hint <string> ......... specify image characteristics hint,\n");
  printf("                           one of: photo, picture or graph\n");
@@ -646,7 +662,8 @@ static void HelpLong(void) {
  printf("  -metadata <string> ..... comma separated list of metadata to\n");
  printf("                           ");
  printf("copy from the input to the output if present.\n");
-  printf("                           "
+  printf(
      "                           "
      "Valid values: all, none (default), exif, icc, xmp\n");
  printf("\n");
@@ -656,7 +673,8 @@ static void HelpLong(void) {
 #ifndef WEBP_DLL
  printf("  -noasm ................. disable all assembly optimizations\n");
 #endif
-  printf("  -v ..................... verbose, e.g. print encoding/decoding "
+  printf(
      "  -v ..................... verbose, e.g. print encoding/decoding "
      "times\n");
  printf("  -progress .............. report encoding progress\n");
  printf("\n");
@@ -687,15 +705,14 @@ static const char* const kErrorMessages[VP8_ENC_ERROR_LAST] = {
    "PARTITION_OVERFLOW: Partition is too big to fit 16M",
    "BAD_WRITE: Picture writer returned an I/O error",
    "FILE_TOO_BIG: File would be too big to fit in 4G",
-  "USER_ABORT: encoding abort requested by user"
+    "USER_ABORT: encoding abort requested by user"};
 };
 //------------------------------------------------------------------------------
 // Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  int return_value = EXIT_FAILURE;
-  const char* in_file = NULL, *out_file = NULL, *dump_file = NULL;
+  const char *in_file = NULL, *out_file = NULL, *dump_file = NULL;
  FILE* out = NULL;
  int c;
  int short_output = 0;
@@ -725,8 +742,7 @@ int main(int argc, const char* argv[]) {
  MetadataInit(&metadata);
  WebPMemoryWriterInit(&memory_writer);
-  if (!WebPPictureInit(&picture) ||
+  if (!WebPPictureInit(&picture) || !WebPPictureInit(&original_picture) ||
      !WebPPictureInit(&original_picture) ||
      !WebPConfigInit(&config)) {
    fprintf(stderr, "Error! Version mismatch!\n");
    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
@@ -766,8 +782,7 @@ int main(int argc, const char* argv[]) {
      picture.height = ExUtilGetInt(argv[++c], 0, &parse_error);
      if (picture.width > WEBP_MAX_DIMENSION || picture.width < 0 ||
          picture.height > WEBP_MAX_DIMENSION || picture.height < 0) {
-        fprintf(stderr,
+        fprintf(stderr, "Specified dimension (%d x %d) is out of range.\n",
                "Specified dimension (%d x %d) is out of range.\n",
                picture.width, picture.height);
        goto Error;
      }
@@ -892,11 +907,10 @@ int main(int argc, const char* argv[]) {
    } else if (!strcmp(argv[c], "-version")) {
      const int version = WebPGetEncoderVersion();
      const int sharpyuv_version = SharpYuvGetVersion();
-      printf("%d.%d.%d\n",
+      printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff,
-             (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+             version & 0xff);
-      printf("libsharpyuv: %d.%d.%d\n",
+      printf("libsharpyuv: %d.%d.%d\n", (sharpyuv_version >> 24) & 0xff,
-             (sharpyuv_version >> 24) & 0xff, (sharpyuv_version >> 16) & 0xffff,
+             (sharpyuv_version >> 16) & 0xffff, sharpyuv_version & 0xff);
             sharpyuv_version & 0xff);
      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-progress")) {
      show_progress = 1;
@@ -930,11 +944,11 @@ int main(int argc, const char* argv[]) {
        const char* option;
        int flag;
      } kTokens[] = {
-        { "all",  METADATA_ALL },
+          {"all", METADATA_ALL},    //
-        { "none", 0 },
+          {"none", 0},              //
-        { "exif", METADATA_EXIF },
+          {"exif", METADATA_EXIF},  //
-        { "icc",  METADATA_ICC },
+          {"icc", METADATA_ICC},    //
-        { "xmp",  METADATA_XMP },
+          {"xmp", METADATA_XMP},    //
      };
      const size_t kNumTokens = sizeof(kTokens) / sizeof(kTokens[0]);
      const char* start = argv[++c];
@@ -966,7 +980,8 @@ int main(int argc, const char* argv[]) {
 #ifdef HAVE_WINCODEC_H
      if (keep_metadata != 0 && keep_metadata != METADATA_ICC) {
        // TODO(jzern): remove when -metadata is supported on all platforms.
-        fprintf(stderr, "Warning: only ICC profile extraction is currently"
+        fprintf(stderr,
                "Warning: only ICC profile extraction is currently"
                " supported on this platform!\n");
      }
 #endif
@@ -1005,11 +1020,13 @@ int main(int argc, const char* argv[]) {
  // warning for such options.
  if (!quiet && config.lossless == 1) {
    if (config.target_size > 0 || config.target_PSNR > 0) {
-      fprintf(stderr, "Encoding for specified size or PSNR is not supported"
+      fprintf(stderr,
              "Encoding for specified size or PSNR is not supported"
              " for lossless encoding. Ignoring such option(s)!\n");
    }
    if (config.partition_limit > 0) {
-      fprintf(stderr, "Partition limit option is not required for lossless"
+      fprintf(stderr,
              "Partition limit option is not required for lossless"
              " encoding. Ignoring this option!\n");
    }
  }
@@ -1027,8 +1044,8 @@ int main(int argc, const char* argv[]) {
  // Read the input. We need to decide if we prefer ARGB or YUVA
  // samples, depending on the expected compression mode (this saves
  // some conversion steps).
-  picture.use_argb = (config.lossless || config.use_sharp_yuv ||
+  picture.use_argb =
-                      config.preprocessing > 0 ||
+      (config.lossless || config.use_sharp_yuv || config.preprocessing > 0 ||
       crop || (resize_w | resize_h) > 0);
  if (verbose) {
    StopwatchReset(&stop_watch);
@@ -1177,8 +1194,8 @@ int main(int argc, const char* argv[]) {
  }
  if (!WebPEncode(&config, &picture)) {
    fprintf(stderr, "Error! Cannot encode picture as WebP\n");
-    fprintf(stderr, "Error code: %d (%s)\n",
+    fprintf(stderr, "Error code: %d (%s)\n", picture.error_code,
-            picture.error_code, kErrorMessages[picture.error_code]);
+            kErrorMessages[picture.error_code]);
    goto Error;
  }
  if (verbose) {
@@ -1221,7 +1238,8 @@ int main(int argc, const char* argv[]) {
  // Write the YUV planes to a PGM file. Only available for lossy.
  if (dump_file) {
    if (picture.use_argb) {
-      fprintf(stderr, "Warning: can't dump file (-d option) "
+      fprintf(stderr,
              "Warning: can't dump file (-d option) "
              "in lossless mode.\n");
    } else if (!DumpPicture(&picture, dump_file)) {
      WFPRINTF(stderr, "Warning, couldn't dump picture %s\n",
@@ -1268,17 +1286,17 @@ int main(int argc, const char* argv[]) {
      PrintMapInfo(&picture);
    }
    if (print_distortion >= 0) {  // print distortion
-      static const char* distortion_names[] = { "PSNR", "SSIM", "LSIM" };
+      static const char* distortion_names[] = {"PSNR", "SSIM", "LSIM"};
      float values[5];
-      if (!WebPPictureDistortion(&picture, &original_picture,
+      if (!WebPPictureDistortion(&picture, &original_picture, print_distortion,
-                                 print_distortion, values)) {
+                                 values)) {
        fprintf(stderr, "Error while computing the distortion.\n");
        goto Error;
      }
      if (!short_output) {
        fprintf(stderr, "%s: ", distortion_names[print_distortion]);
-        fprintf(stderr, "B:%.2f G:%.2f R:%.2f A:%.2f  Total:%.2f\n",
+        fprintf(stderr, "B:%.2f G:%.2f R:%.2f A:%.2f  Total:%.2f\n", values[0],
-                values[0], values[1], values[2], values[3], values[4]);
+                values[1], values[2], values[3], values[4]);
      } else {
        fprintf(stderr, "%7d %.4f\n", picture.stats->coded_size, values[4]);
      }
@@ -1289,7 +1307,7 @@ int main(int argc, const char* argv[]) {
  }
  return_value = EXIT_SUCCESS;
- Error:
+Error:
  WebPMemoryWriterClear(&memory_writer);
  WebPFree(picture.extra_info);
  MetadataFree(&metadata);
--- a/examples/dwebp.c
+++ b/examples/dwebp.c
@@ -42,7 +42,6 @@ extern void* VP8GetCPUInfo;   // opaque forward declaration.
 #endif
 #endif  // WEBP_DLL
 static int SaveOutput(const WebPDecBuffer* const buffer,
                      WebPOutputFileFormat format, const char* const out_file) {
  const int use_stdout = (out_file != NULL) && !WSTRCMP(out_file, "-");
@@ -77,7 +76,8 @@ static int SaveOutput(const WebPDecBuffer* const buffer,
 }
 static void Help(void) {
-  printf("Usage: dwebp in_file [options] [-o out_file]\n\n"
+  printf(
      "Usage: dwebp in_file [options] [-o out_file]\n\n"
      "Decodes the WebP image file to PNG format [Default].\n"
      "Note: Animated WebP files are not supported.\n\n"
      "Use following options to convert into alternate image formats:\n"
@@ -111,9 +111,7 @@ static void Help(void) {
  );
 }
-static const char* const kFormatType[] = {
+static const char* const kFormatType[] = {"unspecified", "lossy", "lossless"};
  "unspecified", "lossy", "lossless"
 };
 static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
                                       WebPOutputFileFormat format,
@@ -130,9 +128,9 @@ static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
    h = config->options.crop_height;
  }
  if (format >= RGB && format <= rgbA_4444) {
-    const int bpp = (format == RGB || format == BGR) ? 3
+    const int bpp =
-                  : (format == RGBA_4444 || format == rgbA_4444 ||
+        (format == RGB || format == BGR)                                    ? 3
-                     format == RGB_565) ? 2
+        : (format == RGBA_4444 || format == rgbA_4444 || format == RGB_565) ? 2
                                                                            : 4;
    uint32_t stride = bpp * w + 7;  // <- just for exercising
    external_buffer = (uint8_t*)WebPMalloc(stride * h);
@@ -145,8 +143,8 @@ static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
    uint8_t* tmp;
    uint32_t stride = w + 3;
    uint32_t uv_stride = (w + 1) / 2 + 13;
-    uint32_t total_size = stride * h * (has_alpha ? 2 : 1)
+    uint32_t total_size =
-                        + 2 * uv_stride * (h + 1) / 2;
+        stride * h * (has_alpha ? 2 : 1) + 2 * uv_stride * (h + 1) / 2;
    assert(format >= YUV && format <= YUVA);
    external_buffer = (uint8_t*)WebPMalloc(total_size);
    if (external_buffer == NULL) return NULL;
@@ -228,8 +226,8 @@ int main(int argc, const char* argv[]) {
      quiet = 1;
    } else if (!strcmp(argv[c], "-version")) {
      const int version = WebPGetDecoderVersion();
-      printf("%d.%d.%d\n",
+      printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff,
-             (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+             version & 0xff);
      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-pgm")) {
      format = PGM;
@@ -237,19 +235,32 @@ int main(int argc, const char* argv[]) {
      format = RAW_YUV;
    } else if (!strcmp(argv[c], "-pixel_format") && c < argc - 1) {
      const char* const fmt = argv[++c];
-      if      (!strcmp(fmt, "RGB"))  format = RGB;
+      if (!strcmp(fmt, "RGB"))
-      else if (!strcmp(fmt, "RGBA")) format = RGBA;
+        format = RGB;
-      else if (!strcmp(fmt, "BGR"))  format = BGR;
+      else if (!strcmp(fmt, "RGBA"))
-      else if (!strcmp(fmt, "BGRA")) format = BGRA;
+        format = RGBA;
-      else if (!strcmp(fmt, "ARGB")) format = ARGB;
+      else if (!strcmp(fmt, "BGR"))
-      else if (!strcmp(fmt, "RGBA_4444")) format = RGBA_4444;
+        format = BGR;
-      else if (!strcmp(fmt, "RGB_565")) format = RGB_565;
+      else if (!strcmp(fmt, "BGRA"))
-      else if (!strcmp(fmt, "rgbA")) format = rgbA;
+        format = BGRA;
-      else if (!strcmp(fmt, "bgrA")) format = bgrA;
+      else if (!strcmp(fmt, "ARGB"))
-      else if (!strcmp(fmt, "Argb")) format = Argb;
+        format = ARGB;
-      else if (!strcmp(fmt, "rgbA_4444")) format = rgbA_4444;
+      else if (!strcmp(fmt, "RGBA_4444"))
-      else if (!strcmp(fmt, "YUV"))  format = YUV;
+        format = RGBA_4444;
-      else if (!strcmp(fmt, "YUVA")) format = YUVA;
+      else if (!strcmp(fmt, "RGB_565"))
        format = RGB_565;
      else if (!strcmp(fmt, "rgbA"))
        format = rgbA;
      else if (!strcmp(fmt, "bgrA"))
        format = bgrA;
      else if (!strcmp(fmt, "Argb"))
        format = Argb;
      else if (!strcmp(fmt, "rgbA_4444"))
        format = rgbA_4444;
      else if (!strcmp(fmt, "YUV"))
        format = YUV;
      else if (!strcmp(fmt, "YUVA"))
        format = YUVA;
      else {
        fprintf(stderr, "Can't parse pixel_format %s\n", fmt);
        parse_error = 1;
@@ -350,25 +361,52 @@ int main(int argc, const char* argv[]) {
        output_buffer->colorspace = MODE_YUVA;
        break;
      // forced modes:
-      case RGB: output_buffer->colorspace = MODE_RGB; break;
+      case RGB:
-      case RGBA: output_buffer->colorspace = MODE_RGBA; break;
+        output_buffer->colorspace = MODE_RGB;
-      case BGR: output_buffer->colorspace = MODE_BGR; break;
+        break;
-      case BGRA: output_buffer->colorspace = MODE_BGRA; break;
+      case RGBA:
-      case ARGB: output_buffer->colorspace = MODE_ARGB; break;
+        output_buffer->colorspace = MODE_RGBA;
-      case RGBA_4444: output_buffer->colorspace = MODE_RGBA_4444; break;
+        break;
-      case RGB_565: output_buffer->colorspace = MODE_RGB_565; break;
+      case BGR:
-      case rgbA: output_buffer->colorspace = MODE_rgbA; break;
+        output_buffer->colorspace = MODE_BGR;
-      case bgrA: output_buffer->colorspace = MODE_bgrA; break;
+        break;
-      case Argb: output_buffer->colorspace = MODE_Argb; break;
+      case BGRA:
-      case rgbA_4444: output_buffer->colorspace = MODE_rgbA_4444; break;
+        output_buffer->colorspace = MODE_BGRA;
-      case YUV: output_buffer->colorspace = MODE_YUV; break;
+        break;
-      case YUVA: output_buffer->colorspace = MODE_YUVA; break;
+      case ARGB:
-      default: goto Exit;
+        output_buffer->colorspace = MODE_ARGB;
        break;
      case RGBA_4444:
        output_buffer->colorspace = MODE_RGBA_4444;
        break;
      case RGB_565:
        output_buffer->colorspace = MODE_RGB_565;
        break;
      case rgbA:
        output_buffer->colorspace = MODE_rgbA;
        break;
      case bgrA:
        output_buffer->colorspace = MODE_bgrA;
        break;
      case Argb:
        output_buffer->colorspace = MODE_Argb;
        break;
      case rgbA_4444:
        output_buffer->colorspace = MODE_rgbA_4444;
        break;
      case YUV:
        output_buffer->colorspace = MODE_YUV;
        break;
      case YUVA:
        output_buffer->colorspace = MODE_YUVA;
        break;
      default:
        goto Exit;
    }
    if (use_external_memory > 0 && format >= RGB) {
-      external_buffer = AllocateExternalBuffer(&config, format,
+      external_buffer =
-                                               use_external_memory);
+          AllocateExternalBuffer(&config, format, use_external_memory);
      if (external_buffer == NULL) goto Exit;
    }
@@ -410,11 +448,12 @@ int main(int argc, const char* argv[]) {
              output_buffer->width, output_buffer->height,
              bitstream->has_alpha ? " (with alpha)" : "",
              kFormatType[bitstream->format]);
-      fprintf(stderr, "Nothing written; "
+      fprintf(stderr,
              "Nothing written; "
              "use -o flag to save the result as e.g. PNG.\n");
    }
  }
- Exit:
+Exit:
  WebPFreeDecBuffer(output_buffer);
  WebPFree((void*)external_buffer);
  WebPFree((void*)data);
--- a/examples/example_util.c
+++ b/examples/example_util.c
@@ -110,8 +110,7 @@ int ExUtilInitCommandLineArguments(int argc, const char* argv[],
    }
    argc = 0;
-    for (cur = strtok((char*)args->argv_data.bytes, sep);
+    for (cur = strtok((char*)args->argv_data.bytes, sep); cur != NULL;
         cur != NULL;
         cur = strtok(NULL, sep)) {
      if (argc == MAX_ARGC) {
        fprintf(stderr, "ERROR: Arguments limit %d reached\n", MAX_ARGC);
--- a/examples/example_util.h
+++ b/examples/example_util.h
@@ -13,8 +13,8 @@
 #ifndef WEBP_EXAMPLES_EXAMPLE_UTIL_H_
 #define WEBP_EXAMPLES_EXAMPLE_UTIL_H_
 #include "webp/types.h"
 #include "webp/mux_types.h"
 #include "webp/types.h"
 #ifdef __cplusplus
 extern "C" {
--- a/examples/gif2webp.c
+++ b/examples/gif2webp.c
@@ -28,14 +28,15 @@
 #endif
 #include <gif_lib.h>
-#include "sharpyuv/sharpyuv.h"
+
 #include "webp/encode.h"
 #include "webp/mux.h"
 #include "../examples/example_util.h"
 #include "../imageio/imageio_util.h"
 #include "./gifdec.h"
 #include "./unicode.h"
 #include "./unicode_gif.h"
 #include "sharpyuv/sharpyuv.h"
 #include "webp/encode.h"
 #include "webp/mux.h"
 #if !defined(STDIN_FILENO)
 #define STDIN_FILENO 0
@@ -47,8 +48,7 @@ static int transparent_index = GIF_INDEX_INVALID;  // Opaque by default.
 static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
    "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
-  "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"
+    "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"};
 };
 static const char* ErrorString(WebPMuxError err) {
  assert(err <= WEBP_MUX_NOT_FOUND && err >= WEBP_MUX_NOT_ENOUGH_DATA);
@@ -69,17 +69,22 @@ static void Help(void) {
  printf("Options:\n");
  printf("  -h / -help ............. this help\n");
  printf("  -lossy ................. encode image using lossy compression\n");
-  printf("  -mixed ................. for each frame in the image, pick lossy\n"
+  printf(
      "  -mixed ................. for each frame in the image, pick lossy\n"
      "                           or lossless compression heuristically\n");
-  printf("  -near_lossless <int> ... use near-lossless image preprocessing\n"
+  printf(
      "  -near_lossless <int> ... use near-lossless image preprocessing\n"
      "                           (0..100=off), default=100\n");
-  printf("  -sharp_yuv ............. use sharper (and slower) RGB->YUV "
+  printf(
      "  -sharp_yuv ............. use sharper (and slower) RGB->YUV "
      "conversion\n"
      "                           (lossy only)\n");
  printf("  -q <float> ............. quality factor (0:small..100:big)\n");
-  printf("  -m <int> ............... compression method (0=fast, 6=slowest), "
+  printf(
      "  -m <int> ............... compression method (0=fast, 6=slowest), "
      "default=4\n");
-  printf("  -min_size .............. minimize output size (default:off)\n"
+  printf(
      "  -min_size .............. minimize output size (default:off)\n"
      "                           lossless compression by default; can be\n"
      "                           combined with -q, -m, -lossy or -mixed\n"
      "                           options\n");
@@ -109,7 +114,7 @@ int main(int argc, const char* argv[]) {
  int gif_error = GIF_ERROR;
  WebPMuxError err = WEBP_MUX_OK;
  int ok = 0;
-  const W_CHAR* in_file = NULL, *out_file = NULL;
+  const W_CHAR *in_file = NULL, *out_file = NULL;
  GifFileType* gif = NULL;
  int frame_duration = 0;
  int frame_timestamp = 0;
@@ -198,10 +203,10 @@ int main(int argc, const char* argv[]) {
        const char* option;
        int flag;
      } kTokens[] = {
-        { "all",  METADATA_ALL },
+          {"all", METADATA_ALL},
-        { "none", 0 },
+          {"none", 0},
-        { "icc",  METADATA_ICC },
+          {"icc", METADATA_ICC},
-        { "xmp",  METADATA_XMP },
+          {"xmp", METADATA_XMP},
      };
      const size_t kNumTokens = sizeof(kTokens) / sizeof(*kTokens);
      const char* start = argv[++c];
@@ -319,8 +324,8 @@ int main(int argc, const char* argv[]) {
              goto End;
            }
            if (verbose) {
-              printf("Fixed canvas screen dimension to: %d x %d\n",
+              printf("Fixed canvas screen dimension to: %d x %d\n", gif->SWidth,
-                     gif->SWidth, gif->SHeight);
+                     gif->SHeight);
            }
          }
          // Allocate current buffer.
@@ -555,8 +560,10 @@ int main(int argc, const char* argv[]) {
    err = WebPMuxAssemble(mux, &webp_data);
    if (err != WEBP_MUX_OK) {
-      fprintf(stderr, "ERROR (%s): Could not assemble when re-muxing to add "
+      fprintf(stderr,
-              "loop count/metadata.\n", ErrorString(err));
+              "ERROR (%s): Could not assemble when re-muxing to add "
              "loop count/metadata.\n",
              ErrorString(err));
      goto End;
    }
  }
@@ -569,8 +576,7 @@ int main(int argc, const char* argv[]) {
    }
    if (!quiet) {
      if (!WSTRCMP(out_file, "-")) {
-        fprintf(stderr, "Saved %d bytes to STDIO\n",
+        fprintf(stderr, "Saved %d bytes to STDIO\n", (int)webp_data.size);
                (int)webp_data.size);
      } else {
        WFPRINTF(stderr, "Saved output file (%d bytes): %s\n",
                 (int)webp_data.size, out_file);
@@ -578,8 +584,10 @@ int main(int argc, const char* argv[]) {
    }
  } else {
    if (!quiet) {
-      fprintf(stderr, "Nothing written; use -o flag to save the result "
+      fprintf(stderr,
-                      "(%d bytes).\n", (int)webp_data.size);
+              "Nothing written; use -o flag to save the result "
              "(%d bytes).\n",
              (int)webp_data.size);
    }
  }
@@ -587,7 +595,7 @@ int main(int argc, const char* argv[]) {
  ok = 1;
  gif_error = GIF_OK;
- End:
+End:
  WebPDataClear(&icc_data);
  WebPDataClear(&xmp_data);
  WebPMuxDelete(mux);
@@ -601,7 +609,7 @@ int main(int argc, const char* argv[]) {
    GIFDisplayError(gif, gif_error);
  }
  if (gif != NULL) {
-#if LOCAL_GIF_PREREQ(5,1)
+#if LOCAL_GIF_PREREQ(5, 1)
    DGifCloseFile(gif, &gif_error);
 #else
    DGifCloseFile(gif);
--- a/examples/gifdec.c
+++ b/examples/gifdec.c
@@ -19,8 +19,8 @@
 #include <string.h>
 #include "webp/encode.h"
 #include "webp/types.h"
 #include "webp/mux_types.h"
 #include "webp/types.h"
 #define GIF_TRANSPARENT_COLOR 0x00000000u
 #define GIF_WHITE_COLOR 0xffffffffu
@@ -32,9 +32,8 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-extern void WebPCopyPlane(const uint8_t* src, int src_stride,
+extern void WebPCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst,
-                          uint8_t* dst, int dst_stride,
+                          int dst_stride, int width, int height);
                          int width, int height);
 extern void WebPCopyPixels(const WebPPicture* const src,
                           WebPPicture* const dst);
 #ifdef __cplusplus
@@ -47,18 +46,16 @@ void GIFGetBackgroundColor(const ColorMapObject* const color_map,
  if (transparent_index != GIF_INDEX_INVALID &&
      bgcolor_index == transparent_index) {
    *bgcolor = GIF_TRANSPARENT_COLOR;  // Special case.
-  } else if (color_map == NULL || color_map->Colors == NULL
+  } else if (color_map == NULL || color_map->Colors == NULL ||
-             || bgcolor_index >= color_map->ColorCount) {
+             bgcolor_index >= color_map->ColorCount) {
    *bgcolor = GIF_WHITE_COLOR;
    fprintf(stderr,
            "GIF decode warning: invalid background color index. Assuming "
            "white background.\n");
  } else {
    const GifColorType color = color_map->Colors[bgcolor_index];
-    *bgcolor = (0xffu       << 24)
+    *bgcolor = (0xffu << 24) | (color.Red << 16) | (color.Green << 8) |
-             | (color.Red   << 16)
+               (color.Blue << 0);
             | (color.Green <<  8)
             | (color.Blue  <<  0);
  }
 }
@@ -117,9 +114,8 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
  const GifImageDesc* const image_desc = &gif->Image;
  uint32_t* dst = NULL;
  uint8_t* tmp = NULL;
-  const GIFFrameRect rect = {
+  const GIFFrameRect rect = {image_desc->Left, image_desc->Top,
-      image_desc->Left, image_desc->Top, image_desc->Width, image_desc->Height
+                             image_desc->Width, image_desc->Height};
  };
  const uint64_t memory_needed = 4 * rect.width * (uint64_t)rect.height;
  int ok = 0;
  *gif_rect = rect;
@@ -130,8 +126,8 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
  }
  // Use a view for the sub-picture:
-  if (!WebPPictureView(picture, rect.x_offset, rect.y_offset,
+  if (!WebPPictureView(picture, rect.x_offset, rect.y_offset, rect.width,
-                       rect.width, rect.height, &sub_image)) {
+                       rect.height, &sub_image)) {
    fprintf(stderr, "Sub-image %dx%d at position %d,%d is invalid!\n",
            rect.width, rect.height, rect.x_offset, rect.y_offset);
    return 0;
@@ -143,8 +139,8 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
  if (image_desc->Interlace) {  // Interlaced image.
    // We need 4 passes, with the following offsets and jumps.
-    const int interlace_offsets[] = { 0, 4, 2, 1 };
+    const int interlace_offsets[] = {0, 4, 2, 1};
-    const int interlace_jumps[]   = { 8, 8, 4, 2 };
+    const int interlace_jumps[] = {8, 8, 4, 2};
    int pass;
    for (pass = 0; pass < 4; ++pass) {
      const size_t stride = (size_t)sub_image.argb_stride;
@@ -166,7 +162,7 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
  }
  ok = 1;
- End:
+End:
  if (!ok) picture->error_code = sub_image.error_code;
  WebPPictureFree(&sub_image);
  WebPFree(tmp);
@@ -220,8 +216,7 @@ int GIFReadMetadata(GifFileType* const gif, GifByteType** const buf,
    if (tmp == NULL) {
      return 0;
    }
-    memcpy((void*)(tmp + metadata->size),
+    memcpy((void*)(tmp + metadata->size), subblock.bytes, subblock.size);
           subblock.bytes, subblock.size);
    metadata->bytes = tmp;
    metadata->size += subblock.size;
  }
@@ -235,8 +230,8 @@ int GIFReadMetadata(GifFileType* const gif, GifByteType** const buf,
  return 1;
 }
-static void ClearRectangle(WebPPicture* const picture,
+static void ClearRectangle(WebPPicture* const picture, int left, int top,
-                           int left, int top, int width, int height) {
+                           int width, int height) {
  int i, j;
  const size_t stride = picture->argb_stride;
  uint32_t* dst = picture->argb + top * stride + left;
@@ -247,8 +242,8 @@ static void ClearRectangle(WebPPicture* const picture,
 void GIFClearPic(WebPPicture* const pic, const GIFFrameRect* const rect) {
  if (rect != NULL) {
-    ClearRectangle(pic, rect->x_offset, rect->y_offset,
+    ClearRectangle(pic, rect->x_offset, rect->y_offset, rect->width,
-                   rect->width, rect->height);
+                   rect->height);
  } else {
    ClearRectangle(pic, 0, 0, pic->width, pic->height);
  }
@@ -266,15 +261,14 @@ void GIFDisposeFrame(GIFDisposeMethod dispose, const GIFFrameRect* const rect,
    GIFClearPic(curr_canvas, rect);
  } else if (dispose == GIF_DISPOSE_RESTORE_PREVIOUS) {
    const size_t src_stride = prev_canvas->argb_stride;
-    const uint32_t* const src = prev_canvas->argb + rect->x_offset
+    const uint32_t* const src =
-                              + rect->y_offset * src_stride;
+        prev_canvas->argb + rect->x_offset + rect->y_offset * src_stride;
    const size_t dst_stride = curr_canvas->argb_stride;
-    uint32_t* const dst = curr_canvas->argb + rect->x_offset
+    uint32_t* const dst =
-                        + rect->y_offset * dst_stride;
+        curr_canvas->argb + rect->x_offset + rect->y_offset * dst_stride;
    assert(prev_canvas != NULL);
-    WebPCopyPlane((uint8_t*)src, (int)(4 * src_stride),
+    WebPCopyPlane((uint8_t*)src, (int)(4 * src_stride), (uint8_t*)dst,
-                  (uint8_t*)dst, (int)(4 * dst_stride),
+                  (int)(4 * dst_stride), 4 * rect->width, rect->height);
                  4 * rect->width, rect->height);
  }
 }
@@ -297,11 +291,11 @@ void GIFBlendFrames(const WebPPicture* const src,
 void GIFDisplayError(const GifFileType* const gif, int gif_error) {
  // libgif 4.2.0 has retired PrintGifError() and added GifErrorString().
-#if LOCAL_GIF_PREREQ(4,2)
+#if LOCAL_GIF_PREREQ(4, 2)
-#if LOCAL_GIF_PREREQ(5,0)
+#if LOCAL_GIF_PREREQ(5, 0)
  // Static string actually, hence the const char* cast.
-  const char* error_str = (const char*)GifErrorString(
+  const char* error_str =
-      (gif == NULL) ? gif_error : gif->Error);
+      (const char*)GifErrorString((gif == NULL) ? gif_error : gif->Error);
 #else
  const char* error_str = (const char*)GifErrorString();
  (void)gif;
@@ -319,7 +313,8 @@ void GIFDisplayError(const GifFileType* const gif, int gif_error) {
 #else  // !WEBP_HAVE_GIF
 static void ErrorGIFNotAvailable(void) {
-  fprintf(stderr, "GIF support not compiled. Please install the libgif-dev "
+  fprintf(stderr,
          "GIF support not compiled. Please install the libgif-dev "
          "package before building.\n");
 }
--- a/examples/gifdec.h
+++ b/examples/gifdec.h
@@ -30,12 +30,11 @@ extern "C" {
 // GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
 #if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
-# define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
+#define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
-# define LOCAL_GIF_PREREQ(maj, min) \
+#define LOCAL_GIF_PREREQ(maj, min) (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
    (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
 #else
-# define LOCAL_GIF_VERSION 0
+#define LOCAL_GIF_VERSION 0
-# define LOCAL_GIF_PREREQ(maj, min) 0
+#define LOCAL_GIF_PREREQ(maj, min) 0
 #endif
 #define GIF_INDEX_INVALID (-1)
--- a/examples/img2webp.c
+++ b/examples/img2webp.c
@@ -43,14 +43,18 @@ static void Help(void) {
  printf("File-level options (only used at the start of compression):\n");
  printf(" -min_size ............ minimize size\n");
-  printf(" -kmax <int> .......... maximum number of frame between key-frames\n"
+  printf(
      " -kmax <int> .......... maximum number of frame between key-frames\n"
      "                        (0=only keyframes)\n");
-  printf(" -kmin <int> .......... minimum number of frame between key-frames\n"
+  printf(
      " -kmin <int> .......... minimum number of frame between key-frames\n"
      "                        (0=disable key-frames altogether)\n");
  printf(" -mixed ............... use mixed lossy/lossless automatic mode\n");
-  printf(" -near_lossless <int> . use near-lossless image preprocessing\n"
+  printf(
      " -near_lossless <int> . use near-lossless image preprocessing\n"
      "                        (0..100=off), default=100\n");
-  printf(" -sharp_yuv ........... use sharper (and slower) RGB->YUV "
+  printf(
      " -sharp_yuv ........... use sharper (and slower) RGB->YUV "
      "conversion\n                        "
      "(lossy only)\n");
  printf(" -loop <int> .......... loop count (default: 0, = infinite loop)\n");
@@ -64,19 +68,24 @@ static void Help(void) {
  printf(" -lossless ............ use lossless mode (default)\n");
  printf(" -lossy ............... use lossy mode\n");
  printf(" -q <float> ........... quality\n");
-  printf(" -m <int> ............. compression method (0=fast, 6=slowest), "
+  printf(
      " -m <int> ............. compression method (0=fast, 6=slowest), "
      "default=4\n");
-  printf(" -exact, -noexact ..... preserve or alter RGB values in transparent "
+  printf(
      " -exact, -noexact ..... preserve or alter RGB values in transparent "
      "area\n"
      "                        (default: -noexact, may cause artifacts\n"
      "                                  with lossy animations)\n");
  printf("\n");
-  printf("example: img2webp -loop 2 in0.png -lossy in1.jpg\n"
+  printf(
      "example: img2webp -loop 2 in0.png -lossy in1.jpg\n"
      "                  -d 80 in2.tiff -o out.webp\n");
-  printf("\nNote: if a single file name is passed as the argument, the "
+  printf(
      "\nNote: if a single file name is passed as the argument, the "
      "arguments will be\n");
-  printf("tokenized from this file. The file name must not start with "
+  printf(
      "tokenized from this file. The file name must not start with "
      "the character '-'.\n");
  printf("\nSupported input formats:\n  %s\n",
         WebPGetEnabledInputFileFormats());
@@ -127,7 +136,7 @@ static int SetLoopCount(int loop_count, WebPData* const webp_data) {
    ok = (err == WEBP_MUX_OK);
  }
- End:
+End:
  WebPMuxDelete(mux);
  if (!ok) {
    fprintf(stderr, "Error during loop-count setting\n");
@@ -166,8 +175,7 @@ int main(int argc, const char* argv[]) {
  argv = cmd_args.argv;
  WebPDataInit(&webp_data);
-  if (!WebPAnimEncoderOptionsInit(&anim_config) ||
+  if (!WebPAnimEncoderOptionsInit(&anim_config) || !WebPConfigInit(&config) ||
      !WebPConfigInit(&config) ||
      !WebPPictureInit(&pic)) {
    fprintf(stderr, "Library version mismatch!\n");
    ok = 0;
@@ -298,7 +306,8 @@ int main(int argc, const char* argv[]) {
    if (ok) {
      ok = (width == pic.width && height == pic.height);
      if (!ok) {
-        fprintf(stderr, "Frame #%d dimension mismatched! "
+        fprintf(stderr,
                "Frame #%d dimension mismatched! "
                "Got %d x %d. Was expecting %d x %d.\n",
                pic_num, pic.width, pic.height, width, height);
      }
@@ -314,8 +323,8 @@ int main(int argc, const char* argv[]) {
    if (!ok) goto End;
    if (verbose) {
-      WFPRINTF(stderr, "Added frame #%3d at time %4d (file: %s)\n",
+      WFPRINTF(stderr, "Added frame #%3d at time %4d (file: %s)\n", pic_num,
-               pic_num, timestamp_ms, GET_WARGV_SHIFTED(argv, c));
+               timestamp_ms, GET_WARGV_SHIFTED(argv, c));
    }
    timestamp_ms += duration;
    ++pic_num;
@@ -323,8 +332,10 @@ int main(int argc, const char* argv[]) {
  for (c = last_input_index + 1; c < argc; ++c) {
    if (argv[c] != NULL) {
-      fprintf(stderr, "Warning: unused option [%s]!"
+      fprintf(stderr,
-                      " Frame options go before the input frame.\n", argv[c]);
+              "Warning: unused option [%s]!"
              " Frame options go before the input frame.\n",
              argv[c]);
    }
  }
@@ -335,7 +346,7 @@ int main(int argc, const char* argv[]) {
    fprintf(stderr, "Error during final animation assembly.\n");
  }
- End:
+End:
  // free resources
  WebPAnimEncoderDelete(enc);
@@ -353,8 +364,8 @@ int main(int argc, const char* argv[]) {
  }
  if (ok) {
-    fprintf(stderr, "[%d frames, %u bytes].\n",
+    fprintf(stderr, "[%d frames, %u bytes].\n", pic_num,
-            pic_num, (unsigned int)webp_data.size);
+            (unsigned int)webp_data.size);
  }
  WebPDataClear(&webp_data);
  ExUtilDeleteCommandLineArguments(&cmd_args);
--- a/examples/stopwatch.h
+++ b/examples/stopwatch.h
@@ -28,16 +28,12 @@ static WEBP_INLINE void StopwatchReset(Stopwatch* watch) {
 static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
  const LARGE_INTEGER old_value = *watch;
  LARGE_INTEGER freq;
-  if (!QueryPerformanceCounter(watch))
+  if (!QueryPerformanceCounter(watch)) return 0.0;
-    return 0.0;
+  if (!QueryPerformanceFrequency(&freq)) return 0.0;
-  if (!QueryPerformanceFrequency(&freq))
+  if (freq.QuadPart == 0) return 0.0;
    return 0.0;
  if (freq.QuadPart == 0)
    return 0.0;
  return (watch->QuadPart - old_value.QuadPart) / (double)freq.QuadPart;
 }
 #else                /* !_WIN32 */
 #include <string.h>  // memcpy
 #include <sys/time.h>
--- a/examples/unicode.h
+++ b/examples/unicode.h
@@ -25,9 +25,9 @@
 #include <fcntl.h>
 #include <io.h>
 #include <shellapi.h>
 #include <wchar.h>
 #include <windows.h>
 #include <shellapi.h>
 // Create a wchar_t array containing Unicode parameters.
 #define INIT_WARGV(ARGC, ARGV)                                       \
--- a/examples/unicode_gif.h
+++ b/examples/unicode_gif.h
@@ -28,6 +28,7 @@
 #include <gif_lib.h>
 #include <string.h>
 #include "./gifdec.h"
 #if !defined(STDIN_FILENO)
--- a/examples/vwebp.c
+++ b/examples/vwebp.c
@@ -38,12 +38,11 @@
 #include <qcms.h>
 #endif
 #include "webp/decode.h"
 #include "webp/demux.h"
 #include "../examples/example_util.h"
 #include "../imageio/imageio_util.h"
 #include "./unicode.h"
 #include "webp/decode.h"
 #include "webp/demux.h"
 #if defined(_MSC_VER) && _MSC_VER < 1900
 #define snprintf _snprintf
@@ -135,9 +134,8 @@ static int ApplyColorProfile(const WebPData* const profile,
  }
  qcms_profile_precache_output_transform(output_profile);
-  transform = qcms_transform_create(input_profile, input_type,
+  transform = qcms_transform_create(input_profile, input_type, output_profile,
-                                    output_profile, output_type,
+                                    output_type, intent);
                                    intent);
  if (transform == NULL) {
    fprintf(stderr, "Error creating color transform!\n");
    goto Error;
@@ -149,7 +147,7 @@ static int ApplyColorProfile(const WebPData* const profile,
  }
  ok = 1;
- Error:
+Error:
  if (input_profile != NULL) qcms_profile_release(input_profile);
  if (output_profile != NULL) qcms_profile_release(output_profile);
  if (transform != NULL) qcms_transform_release(transform);
@@ -172,8 +170,8 @@ static int Decode(void) {   // Fills kParams.curr_frame
  ClearPreviousPic();
  output_buffer->colorspace = MODE_RGBA;
-  ok = (WebPDecode(curr->fragment.bytes, curr->fragment.size,
+  ok = (WebPDecode(curr->fragment.bytes, curr->fragment.size, config) ==
-                   config) == VP8_STATUS_OK);
+        VP8_STATUS_OK);
  if (!ok) {
    fprintf(stderr, "Decoding of frame #%d failed!\n", curr->frame_num);
  } else {
@@ -341,8 +339,7 @@ static void DrawBackground(void) {
    glPushMatrix();
    glLoadIdentity();
    glColor4f(GetColorf(kParams.bg_color, 16),  // BGRA from spec
-              GetColorf(kParams.bg_color, 8),
+              GetColorf(kParams.bg_color, 8), GetColorf(kParams.bg_color, 0),
              GetColorf(kParams.bg_color, 0),
              GetColorf(kParams.bg_color, 24));
    glRecti(-1, -1, +1, +1);
    glPopMatrix();
@@ -402,8 +399,7 @@ static void HandleDisplay(void) {
  *prev = *curr;
-  glDrawPixels(pic->width, pic->height,
+  glDrawPixels(pic->width, pic->height, GL_RGBA, GL_UNSIGNED_BYTE,
               GL_RGBA, GL_UNSIGNED_BYTE,
               (GLvoid*)pic->u.RGBA.rgba);
  if (kParams.print_info) {
    char tmp[32];
@@ -417,8 +413,8 @@ static void HandleDisplay(void) {
    glRasterPos2f(-0.95f, 0.80f);
    PrintString(tmp);
    if (curr->x_offset != 0 || curr->y_offset != 0) {
-      snprintf(tmp, sizeof(tmp), " (offset:%d,%d)",
+      snprintf(tmp, sizeof(tmp), " (offset:%d,%d)", curr->x_offset,
-               curr->x_offset, curr->y_offset);
+               curr->y_offset);
      glRasterPos2f(-0.95f, 0.70f);
      PrintString(tmp);
    }
@@ -571,8 +567,8 @@ int main(int argc, char* argv[]) {
    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }
-  if (!ImgIoUtilReadFile(kParams.file_name,
+  if (!ImgIoUtilReadFile(kParams.file_name, &kParams.data.bytes,
-                         &kParams.data.bytes, &kParams.data.size)) {
+                         &kParams.data.size)) {
    goto Error;
  }
@@ -603,7 +599,8 @@ int main(int argc, char* argv[]) {
    if (!WebPDemuxGetChunk(kParams.dmux, "ICCP", 1, &kParams.iccp)) goto Error;
    printf("VP8X: Found color profile\n");
 #else
-    fprintf(stderr, "Warning: color profile present, but qcms is unavailable!\n"
+    fprintf(stderr,
            "Warning: color profile present, but qcms is unavailable!\n"
            "Build libqcms from Mozilla or Chromium and define WEBP_HAVE_QCMS "
            "before building.\n");
 #endif
@@ -614,8 +611,8 @@ int main(int argc, char* argv[]) {
  kParams.has_animation = (curr->num_frames > 1);
  kParams.loop_count = (int)WebPDemuxGetI(kParams.dmux, WEBP_FF_LOOP_COUNT);
  kParams.bg_color = WebPDemuxGetI(kParams.dmux, WEBP_FF_BACKGROUND_COLOR);
-  printf("VP8X: Found %d images in file (loop count = %d)\n",
+  printf("VP8X: Found %d images in file (loop count = %d)\n", curr->num_frames,
-         curr->num_frames, kParams.loop_count);
+         kParams.loop_count);
  // Decode first frame
  if (!Decode()) goto Error;
@@ -645,7 +642,7 @@ int main(int argc, char* argv[]) {
  ClearParams();
  FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
- Error:
+Error:
  ClearParams();
  FREE_WARGV_AND_RETURN(EXIT_FAILURE);
 }
--- a/examples/webpinfo.c
+++ b/examples/webpinfo.c
@@ -47,25 +47,13 @@
    ++webp_info->num_warnings;                   \
  } while (0)
-static const char* const kFormats[3] = {
+static const char* const kFormats[3] = {"Unknown", "Lossy", "Lossless"};
  "Unknown",
  "Lossy",
  "Lossless"
 };
 static const char* const kLosslessTransforms[4] = {
-  "Predictor",
+    "Predictor", "Cross Color", "Subtract Green", "Color Indexing"};
  "Cross Color",
  "Subtract Green",
  "Color Indexing"
 };
-static const char* const kAlphaFilterMethods[4] = {
+static const char* const kAlphaFilterMethods[4] = {"None", "Horizontal",
-  "None",
+                                                   "Vertical", "Gradient"};
  "Horizontal",
  "Vertical",
  "Gradient"
 };
 typedef enum {
  WEBP_INFO_OK = 0,
@@ -129,15 +117,15 @@ static void WebPInfoInit(WebPInfo* const webp_info) {
 }
 static const uint32_t kWebPChunkTags[CHUNK_TYPES] = {
-  MKFOURCC('V', 'P', '8', ' '),
+    MKFOURCC('V', 'P', '8', ' '),  //
-  MKFOURCC('V', 'P', '8', 'L'),
+    MKFOURCC('V', 'P', '8', 'L'),  //
-  MKFOURCC('V', 'P', '8', 'X'),
+    MKFOURCC('V', 'P', '8', 'X'),  //
-  MKFOURCC('A', 'L', 'P', 'H'),
+    MKFOURCC('A', 'L', 'P', 'H'),  //
-  MKFOURCC('A', 'N', 'I', 'M'),
+    MKFOURCC('A', 'N', 'I', 'M'),  //
-  MKFOURCC('A', 'N', 'M', 'F'),
+    MKFOURCC('A', 'N', 'M', 'F'),  //
-  MKFOURCC('I', 'C', 'C', 'P'),
+    MKFOURCC('I', 'C', 'C', 'P'),  //
-  MKFOURCC('E', 'X', 'I', 'F'),
+    MKFOURCC('E', 'X', 'I', 'F'),  //
-  MKFOURCC('X', 'M', 'P', ' '),
+    MKFOURCC('X', 'M', 'P', ' '),  //
 };
 // -----------------------------------------------------------------------------
@@ -200,9 +188,7 @@ static const uint8_t* GetBuffer(MemBuffer* const mem) {
  return mem->buf + mem->start;
 }
-static void Skip(MemBuffer* const mem, size_t size) {
+static void Skip(MemBuffer* const mem, size_t size) { mem->start += size; }
  mem->start += size;
 }
 static uint32_t ReadMemBufLE32(MemBuffer* const mem) {
  const uint8_t* const data = mem->buf + mem->start;
@@ -266,7 +252,8 @@ static WebPInfoStatus ParseLossySegmentHeader(const WebPInfo* const webp_info,
    int update_map, update_data;
    GET_BITS(update_map, 1);
    GET_BITS(update_data, 1);
-    printf("  Update map:       %d\n"
+    printf(
        "  Update map:       %d\n"
        "  Update data:      %d\n",
        update_map, update_data);
    if (update_data) {
@@ -298,8 +285,8 @@ static WebPInfoStatus ParseLossySegmentHeader(const WebPInfo* const webp_info,
        GET_BITS(bit, 1);
        if (bit) GET_BITS(prob_segment[i], 8);
      }
-      printf("  Prob segment:     %d %d %d\n",
+      printf("  Prob segment:     %d %d %d\n", prob_segment[0], prob_segment[1],
-             prob_segment[0], prob_segment[1], prob_segment[2]);
+             prob_segment[2]);
    }
  }
  return WEBP_INFO_OK;
@@ -371,7 +358,8 @@ static WebPInfoStatus ParseLossyHeader(const ChunkData* const chunk_data,
      LOG_ERROR("Invalid lossy bitstream signature.");
      return WEBP_INFO_BITSTREAM_ERROR;
    }
-    printf("  Width:            %d\n"
+    printf(
        "  Width:            %d\n"
        "  X scale:          %d\n"
        "  Height:           %d\n"
        "  Y scale:          %d\n",
@@ -499,7 +487,8 @@ static WebPInfoStatus ParseLosslessTransform(WebPInfo* const webp_info,
        n_colors += 1;
        printf("  No. of colors:    %d\n", n_colors);
        break;
-      default: break;
+      default:
        break;
    }
  }
  return WEBP_INFO_OK;
@@ -556,8 +545,8 @@ static WebPInfoStatus ParseAlphaHeader(const ChunkData* const chunk_data,
    const int pre_processing = (data[0] >> 4) & 0x03;
    const int reserved_bits = (data[0] >> 6) & 0x03;
    printf("  Compression:      %d\n", compression_method);
-    printf("  Filter:           %s (%d)\n",
+    printf("  Filter:           %s (%d)\n", kAlphaFilterMethods[filter],
-           kAlphaFilterMethods[filter], filter);
+           filter);
    printf("  Pre-processing:   %d\n", pre_processing);
    if (compression_method > ALPHA_LOSSLESS_COMPRESSION) {
      LOG_ERROR("Invalid Alpha compression method.");
@@ -642,7 +631,7 @@ static WebPInfoStatus ParseChunk(const WebPInfo* const webp_info,
      LOG_ERROR("Size of chunk payload is over limit.");
      return WEBP_INFO_INVALID_PARAM;
    }
-    if (payload_size_padded > MemDataSize(mem)){
+    if (payload_size_padded > MemDataSize(mem)) {
      LOG_ERROR("Truncated data detected when parsing chunk payload.");
      return WEBP_INFO_TRUNCATED_DATA;
    }
@@ -695,8 +684,8 @@ static WebPInfoStatus ProcessVP8XChunk(const ChunkData* const chunk_data,
           (webp_info->feature_flags & EXIF_FLAG) != 0,
           (webp_info->feature_flags & XMP_FLAG) != 0,
           (webp_info->feature_flags & ANIMATION_FLAG) != 0);
-    printf("  Canvas size %d x %d\n",
+    printf("  Canvas size %d x %d\n", webp_info->canvas_width,
-           webp_info->canvas_width, webp_info->canvas_height);
+           webp_info->canvas_height);
  }
  if (webp_info->canvas_width > MAX_CANVAS_SIZE) {
    LOG_WARN("Canvas width is out of range in VP8X chunk.");
@@ -727,10 +716,8 @@ static WebPInfoStatus ProcessANIMChunk(const ChunkData* const chunk_data,
  ++webp_info->chunk_counts[CHUNK_ANIM];
  if (!webp_info->quiet) {
    printf("  Background color:(ARGB) %02x %02x %02x %02x\n",
-           (webp_info->bgcolor >> 24) & 0xff,
+           (webp_info->bgcolor >> 24) & 0xff, (webp_info->bgcolor >> 16) & 0xff,
-           (webp_info->bgcolor >> 16) & 0xff,
+           (webp_info->bgcolor >> 8) & 0xff, webp_info->bgcolor & 0xff);
           (webp_info->bgcolor >> 8) & 0xff,
           webp_info->bgcolor & 0xff);
    printf("  Loop count      : %d\n", webp_info->loop_count);
  }
  if (webp_info->loop_count > MAX_LOOP_COUNT) {
@@ -765,7 +752,8 @@ static WebPInfoStatus ProcessANMFChunk(const ChunkData* const chunk_data,
  blend = (temp >> 1) & 1;
  ++webp_info->chunk_counts[CHUNK_ANMF];
  if (!webp_info->quiet) {
-    printf("  Offset_X: %d\n  Offset_Y: %d\n  Width: %d\n  Height: %d\n"
+    printf(
        "  Offset_X: %d\n  Offset_Y: %d\n  Width: %d\n  Height: %d\n"
        "  Duration: %d\n  Dispose: %d\n  Blend: %d\n",
        offset_x, offset_y, width, height, duration, dispose, blend);
  }
@@ -804,7 +792,8 @@ static WebPInfoStatus ProcessImageChunk(const ChunkData* const chunk_data,
  }
  if (!webp_info->quiet) {
    assert(features.format >= 0 && features.format <= 2);
-    printf("  Width: %d\n  Height: %d\n  Alpha: %d\n  Animation: %d\n"
+    printf(
        "  Width: %d\n  Height: %d\n  Alpha: %d\n  Animation: %d\n"
        "  Format: %s (%d)\n",
        features.width, features.height, features.has_alpha,
        features.has_animation, kFormats[features.format], features.format);
@@ -831,8 +820,7 @@ static WebPInfoStatus ProcessImageChunk(const ChunkData* const chunk_data,
      LOG_ERROR("Multiple VP8/VP8L chunks detected.");
      return WEBP_INFO_PARSE_ERROR;
    }
-    if (chunk_data->id == CHUNK_VP8L &&
+    if (chunk_data->id == CHUNK_VP8L && webp_info->chunk_counts[CHUNK_ALPHA]) {
        webp_info->chunk_counts[CHUNK_ALPHA]) {
      LOG_WARN("Both VP8L and ALPH chunks are detected.");
    }
    if (webp_info->chunk_counts[CHUNK_ANIM] ||
@@ -882,7 +870,8 @@ static WebPInfoStatus ProcessALPHChunk(const ChunkData* const chunk_data,
    webp_info->seen_alpha_subchunk = 1;
    if (webp_info->seen_image_subchunk) {
-      LOG_ERROR("ALPHA sub-chunk detected after VP8 sub-chunk "
+      LOG_ERROR(
          "ALPHA sub-chunk detected after VP8 sub-chunk "
          "in an ANMF chunk.");
      return WEBP_INFO_PARSE_ERROR;
    }
@@ -949,9 +938,8 @@ static WebPInfoStatus ProcessChunk(const ChunkData* const chunk_data,
               ((fourcc << 8) & 0xff0000) | (fourcc << 24);
 #endif
      memcpy(tag, &fourcc, sizeof(tag));
-      printf("Chunk %c%c%c%c at offset %6d, length %6d\n",
+      printf("Chunk %c%c%c%c at offset %6d, length %6d\n", tag[0], tag[1],
-             tag[0], tag[1], tag[2], tag[3], (int)chunk_data->offset,
+             tag[2], tag[3], (int)chunk_data->offset, (int)chunk_data->size);
             (int)chunk_data->size);
    }
  }
  switch (id) {
@@ -1060,14 +1048,14 @@ static void ShowSummary(const WebPInfo* const webp_info) {
  int i;
  printf("Summary:\n");
  printf("Number of frames: %d\n", webp_info->num_frames);
-  printf("Chunk type  :  VP8 VP8L VP8X ALPH ANIM ANMF(VP8 /VP8L/ALPH) ICCP "
+  printf(
      "Chunk type  :  VP8 VP8L VP8X ALPH ANIM ANMF(VP8 /VP8L/ALPH) ICCP "
      "EXIF  XMP\n");
  printf("Chunk counts: ");
  for (i = 0; i < CHUNK_TYPES; ++i) {
    printf("%4d ", webp_info->chunk_counts[i]);
    if (i == CHUNK_ANMF) {
-      printf("%4d %4d %4d  ",
+      printf("%4d %4d %4d  ", webp_info->anmf_subchunk_counts[0],
             webp_info->anmf_subchunk_counts[0],
             webp_info->anmf_subchunk_counts[1],
             webp_info->anmf_subchunk_counts[2]);
    }
@@ -1097,7 +1085,7 @@ static WebPInfoStatus AnalyzeWebP(WebPInfo* const webp_info,
  //  Final check.
  webp_info_status = Validate(webp_info);
- Error:
+Error:
  if (!webp_info->quiet) {
    if (webp_info_status == WEBP_INFO_OK) {
      printf("No error detected.\n");
@@ -1112,7 +1100,8 @@ static WebPInfoStatus AnalyzeWebP(WebPInfo* const webp_info,
 }
 static void Help(void) {
-  printf("Usage: webpinfo [options] in_files\n"
+  printf(
      "Usage: webpinfo [options] in_files\n"
      "Note: there could be multiple input files;\n"
      "      options must come before input files.\n"
      "Options:\n"
@@ -1153,8 +1142,8 @@ int main(int argc, const char* argv[]) {
      parse_bitstream = 1;
    } else if (!strcmp(argv[c], "-version")) {
      const int version = WebPGetDecoderVersion();
-      printf("WebP Decoder version: %d.%d.%d\n",
+      printf("WebP Decoder version: %d.%d.%d\n", (version >> 16) & 0xff,
-             (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+             (version >> 8) & 0xff, version & 0xff);
      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else {  // Assume the remaining are all input files.
      break;
--- a/examples/webpmux.c
+++ b/examples/webpmux.c
@@ -106,14 +106,11 @@ typedef enum {
  LAST_FEATURE
 } FeatureType;
-static const char* const kFourccList[LAST_FEATURE] = {
+static const char* const kFourccList[LAST_FEATURE] = {NULL, "EXIF", "XMP ",
-  NULL, "EXIF", "XMP ", "ICCP", "ANMF"
+                                                      "ICCP", "ANMF"};
 };
 static const char* const kDescriptions[LAST_FEATURE] = {
-  NULL, "EXIF metadata", "XMP metadata", "ICC profile",
+    NULL, "EXIF metadata", "XMP metadata", "ICC profile", "Animation frame"};
  "Animation frame"
 };
 typedef struct {
  CommandLineArguments cmd_args;
@@ -144,8 +141,7 @@ static int CountOccurrences(const CommandLineArguments* const args,
 static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
    "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
-  "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"
+    "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"};
 };
 static const char* ErrorString(WebPMuxError err) {
  assert(err <= WEBP_MUX_NOT_FOUND && err >= WEBP_MUX_NOT_ENOUGH_DATA);
@@ -222,8 +218,8 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
    WebPMuxAnimParams params;
    err = WebPMuxGetAnimationParams(mux, &params);
    assert(err == WEBP_MUX_OK);
-    printf("Background color : 0x%.8X  Loop Count : %d\n",
+    printf("Background color : 0x%.8X  Loop Count : %d\n", params.bgcolor,
-           params.bgcolor, params.loop_count);
+           params.loop_count);
    err = WebPMuxNumChunks(mux, id, &nFrames);
    assert(err == WEBP_MUX_OK);
@@ -255,9 +251,9 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
            printf("%8d %10s %5s ", frame.duration, dispose, blend);
          }
          printf("%10d %11s\n", (int)frame.bitstream.size,
-                 (features.format == 1) ? "lossy" :
+                 (features.format == 1)   ? "lossy"
-                 (features.format == 2) ? "lossless" :
+                 : (features.format == 2) ? "lossless"
-                                          "undefined");
+                                          : "undefined");
        }
        WebPDataClear(&frame.bitstream);
        RETURN_IF_ERROR3("Failed to retrieve %s#%d\n", type_str, i);
@@ -305,7 +301,8 @@ static void PrintHelp(void) {
  printf("       webpmux -duration DURATION_OPTIONS [-duration ...]\n");
  printf("               INPUT -o OUTPUT\n");
  printf("       webpmux -strip STRIP_OPTIONS INPUT -o OUTPUT\n");
-  printf("       webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]"
+  printf(
      "       webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]"
      "\n");
  printf("               [-bgcolor BACKGROUND_COLOR] -o OUTPUT\n");
  printf("       webpmux -info INPUT\n");
@@ -360,7 +357,8 @@ static void PrintHelp(void) {
  printf("             'di' is the pause duration before next frame,\n");
  printf("             'xi','yi' specify the image offset for this frame,\n");
  printf("             'mi' is the dispose method for this frame (0 or 1),\n");
-  printf("             'bi' is the blending method for this frame (+b or -b)"
+  printf(
      "             'bi' is the blending method for this frame (+b or -b)"
      "\n");
  printf("\n");
@@ -372,9 +370,11 @@ static void PrintHelp(void) {
  printf("BACKGROUND_COLOR:\n");
  printf(" Background color of the canvas.\n");
  printf("  A,R,G,B\n");
-  printf("  where:    'A', 'R', 'G' and 'B' are integers in the range 0 to 255 "
+  printf(
      "  where:    'A', 'R', 'G' and 'B' are integers in the range 0 to 255 "
      "specifying\n");
-  printf("            the Alpha, Red, Green and Blue component values "
+  printf(
      "            the Alpha, Red, Green and Blue component values "
      "respectively\n");
  printf("            [Default: 255,255,255,255]\n");
@@ -382,17 +382,21 @@ static void PrintHelp(void) {
  printf("\nNote: The nature of EXIF, XMP and ICC data is not checked");
  printf(" and is assumed to be\nvalid.\n");
-  printf("\nNote: if a single file name is passed as the argument, the "
+  printf(
      "\nNote: if a single file name is passed as the argument, the "
      "arguments will be\n");
-  printf("tokenized from this file. The file name must not start with "
+  printf(
      "tokenized from this file. The file name must not start with "
      "the character '-'.\n");
 }
 static void WarnAboutOddOffset(const WebPMuxFrameInfo* const info) {
  if ((info->x_offset | info->y_offset) & 1) {
-    fprintf(stderr, "Warning: odd offsets will be snapped to even values"
+    fprintf(stderr,
-            " (%d, %d) -> (%d, %d)\n", info->x_offset, info->y_offset,
+            "Warning: odd offsets will be snapped to even values"
-            info->x_offset & ~1, info->y_offset & ~1);
+            " (%d, %d) -> (%d, %d)\n",
            info->x_offset, info->y_offset, info->x_offset & ~1,
            info->y_offset & ~1);
  }
 }
@@ -420,8 +424,8 @@ static int WriteData(const char* filename, const WebPData* const webpdata) {
  if (fwrite(webpdata->bytes, webpdata->size, 1, fout) != 1) {
    WFPRINTF(stderr, "Error writing file %s!\n", (const W_CHAR*)filename);
  } else {
-    WFPRINTF(stderr, "Saved file %s (%d bytes)\n",
+    WFPRINTF(stderr, "Saved file %s (%d bytes)\n", (const W_CHAR*)filename,
-             (const W_CHAR*)filename, (int)webpdata->size);
+             (int)webpdata->size);
    ok = 1;
  }
  if (fout != stdout) fclose(fout);
@@ -454,8 +458,8 @@ static WebPMux* DuplicateMuxHeader(const WebPMux* const mux) {
  if (err == WEBP_MUX_OK) {
    err = WebPMuxSetAnimationParams(new_mux, &p);
    if (err != WEBP_MUX_OK) {
-      ERROR_GOTO2("Error (%s) handling animation params.\n",
+      ERROR_GOTO2("Error (%s) handling animation params.\n", ErrorString(err),
-                  ErrorString(err), End);
+                  End);
    }
  } else {
    /* it might not be an animation. Just keep moving. */
@@ -473,7 +477,7 @@ static WebPMux* DuplicateMuxHeader(const WebPMux* const mux) {
    }
  }
- End:
+End:
  if (!ok) {
    WebPMuxDelete(new_mux);
    new_mux = NULL;
@@ -511,8 +515,7 @@ static int ParseFrameArgs(const char* args, WebPMuxFrameInfo* const info) {
  if (blend_method != 'b') return 0;
  if (plus_minus != '-' && plus_minus != '+') return 0;
-  info->blend_method =
+  info->blend_method = (plus_minus == '+') ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
      (plus_minus == '+') ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
  return 1;
 }
@@ -584,8 +587,10 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,
  }
  if ((num_frame_args == 0) && (num_loop_args + num_bgcolor_args > 0)) {
-    ERROR_GOTO1("ERROR: Loop count and background color are relevant only in "
+    ERROR_GOTO1(
-                "case of animation.\n", ErrValidate);
+        "ERROR: Loop count and background color are relevant only in "
        "case of animation.\n",
        ErrValidate);
  }
  if (num_durations_args > 0 && num_frame_args != 0) {
    ERROR_GOTO1("ERROR: Can not combine -duration and -frame commands.\n",
@@ -603,7 +608,7 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,
    *num_feature_args = num_frame_args + num_loop_args + num_bgcolor_args;
  }
- ErrValidate:
+ErrValidate:
  return ok;
 }
@@ -739,8 +744,8 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
        exit(0);
      } else if (!strcmp(argv[i], "-version")) {
        const int version = WebPGetMuxVersion();
-        printf("%d.%d.%d\n",
+        printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff,
-               (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+               version & 0xff);
        DeleteConfig(config);
        LOCAL_FREE((W_CHAR**)unicode_argv);
        exit(0);
@@ -771,8 +776,9 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
        if (!strcmp(argv[i], "icc") || !strcmp(argv[i], "exif") ||
            !strcmp(argv[i], "xmp")) {
          if (FEATURETYPE_IS_NIL) {
-            config->type = (!strcmp(argv[i], "icc")) ? FEATURE_ICCP :
+            config->type = (!strcmp(argv[i], "icc"))    ? FEATURE_ICCP
-                (!strcmp(argv[i], "exif")) ? FEATURE_EXIF : FEATURE_XMP;
+                           : (!strcmp(argv[i], "exif")) ? FEATURE_EXIF
                                                        : FEATURE_XMP;
          } else {
            ERROR_GOTO1("ERROR: Multiple features specified.\n", ErrParse);
          }
@@ -826,7 +832,7 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
      }
    }
  }
- ErrParse:
+ErrParse:
  return ok;
 }
@@ -858,7 +864,7 @@ static int ValidateConfig(Config* const config) {
    ERROR_GOTO1("ERROR: No output file specified.\n", ErrValidate2);
  }
- ErrValidate2:
+ErrValidate2:
  return ok;
 }
@@ -889,7 +895,7 @@ static int InitializeConfig(int argc, const char* argv[], Config* const config,
    ERROR_GOTO1("Exiting due to command-line parsing error.\n", Err1);
  }
- Err1:
+Err1:
  return ok;
 }
@@ -921,8 +927,8 @@ static int GetFrame(const WebPMux* mux, const Config* config) {
  err = WebPMuxGetFrame(mux, num, &info);
  if (err == WEBP_MUX_OK && info.id != id) err = WEBP_MUX_NOT_FOUND;
  if (err != WEBP_MUX_OK) {
-    ERROR_GOTO3("ERROR (%s): Could not get frame %d.\n",
+    ERROR_GOTO3("ERROR (%s): Could not get frame %d.\n", ErrorString(err), num,
-                ErrorString(err), num, ErrGet);
+                ErrGet);
  }
  mux_single = WebPMuxNew();
@@ -939,7 +945,7 @@ static int GetFrame(const WebPMux* mux, const Config* config) {
  ok = WriteWebP(mux_single, config->output);
- ErrGet:
+ErrGet:
  WebPDataClear(&info.bitstream);
  WebPMuxDelete(mux_single);
  return ok && !parse_error;
@@ -966,8 +972,8 @@ static int Process(const Config* config) {
        case FEATURE_XMP:
          err = WebPMuxGetChunk(mux, kFourccList[config->type], &chunk);
          if (err != WEBP_MUX_OK) {
-            ERROR_GOTO3("ERROR (%s): Could not get the %s.\n",
+            ERROR_GOTO3("ERROR (%s): Could not get the %s.\n", ErrorString(err),
-                        ErrorString(err), kDescriptions[config->type], Err2);
+                        kDescriptions[config->type], Err2);
          }
          ok = WriteData(config->output, &chunk);
          break;
@@ -982,7 +988,7 @@ static int Process(const Config* config) {
      switch (config->type) {
        case FEATURE_ANMF: {
          int i;
-          WebPMuxAnimParams params = { 0xFFFFFFFF, 0 };
+          WebPMuxAnimParams params = {0xFFFFFFFF, 0};
          mux = WebPMuxNew();
          if (mux == NULL) {
            ERROR_GOTO2("ERROR (%s): Could not allocate a mux object.\n",
@@ -1008,8 +1014,10 @@ static int Process(const Config* config) {
                  // Note: This is only a 'necessary' condition for loop_count
                  // to be valid. The 'sufficient' conditioned in checked in
                  // WebPMuxSetAnimationParams() method called later.
-                  ERROR_GOTO1("ERROR: Loop count must be in the range 0 to "
+                  ERROR_GOTO1(
-                              "65535.\n", Err2);
+                      "ERROR: Loop count must be in the range 0 to "
                      "65535.\n",
                      Err2);
                }
                ok = !parse_error;
                if (!ok) goto Err2;
@@ -1031,8 +1039,10 @@ static int Process(const Config* config) {
                err = WebPMuxPushFrame(mux, &frame, 1);
                WebPDataClear(&frame.bitstream);
                if (err != WEBP_MUX_OK) {
-                  ERROR_GOTO3("ERROR (%s): Could not add a frame at index %d."
+                  ERROR_GOTO3(
-                              "\n", ErrorString(err), i, Err2);
+                      "ERROR (%s): Could not add a frame at index %d."
                      "\n",
                      ErrorString(err), i, Err2);
                }
                break;
              }
@@ -1060,13 +1070,13 @@ static int Process(const Config* config) {
          err = WebPMuxSetChunk(mux, kFourccList[config->type], &chunk, 1);
          WebPDataClear(&chunk);
          if (err != WEBP_MUX_OK) {
-            ERROR_GOTO3("ERROR (%s): Could not set the %s.\n",
+            ERROR_GOTO3("ERROR (%s): Could not set the %s.\n", ErrorString(err),
-                        ErrorString(err), kDescriptions[config->type], Err2);
+                        kDescriptions[config->type], Err2);
          }
          break;
        }
        case FEATURE_LOOP: {
-          WebPMuxAnimParams params = { 0xFFFFFFFF, 0 };
+          WebPMuxAnimParams params = {0xFFFFFFFF, 0};
          int parse_error = 0;
          const int loop_count =
              ExUtilGetInt(config->args[0].params, 10, &parse_error);
@@ -1091,12 +1101,11 @@ static int Process(const Config* config) {
          break;
        }
        case FEATURE_BGCOLOR: {
-          WebPMuxAnimParams params = { 0xFFFFFFFF, 0 };
+          WebPMuxAnimParams params = {0xFFFFFFFF, 0};
          uint32_t bgcolor;
          ok = ParseBgcolorArgs(config->args[0].params, &bgcolor);
          if (!ok) {
-            ERROR_GOTO1("ERROR: Could not parse the background color.\n",
+            ERROR_GOTO1("ERROR: Could not parse the background color.\n", Err2);
                        Err2);
          }
          ok = CreateMux(config->input, &mux);
          if (!ok) goto Err2;
@@ -1132,7 +1141,8 @@ static int Process(const Config* config) {
        ERROR_GOTO1("ERROR: can not parse the number of frames.\n", Err2);
      }
      if (num_frames == 0) {
-        fprintf(stderr, "Doesn't look like the source is animated. "
+        fprintf(stderr,
                "Doesn't look like the source is animated. "
                "Skipping duration setting.\n");
        ok = WriteWebP(mux, config->output);
        if (!ok) goto Err2;
@@ -1150,8 +1160,8 @@ static int Process(const Config* config) {
          int k;
          int args[3];
          int duration, start, end;
-          const int nb_args = ExUtilGetInts(config->args[i].params,
+          const int nb_args =
-                                            10, 3, args);
+              ExUtilGetInts(config->args[i].params, 10, 3, args);
          ok = (nb_args >= 1);
          if (!ok) goto Err3;
          duration = args[0];
@@ -1212,8 +1222,8 @@ static int Process(const Config* config) {
          config->type == FEATURE_XMP) {
        err = WebPMuxDeleteChunk(mux, kFourccList[config->type]);
        if (err != WEBP_MUX_OK) {
-          ERROR_GOTO3("ERROR (%s): Could not strip the %s.\n",
+          ERROR_GOTO3("ERROR (%s): Could not strip the %s.\n", ErrorString(err),
-                      ErrorString(err), kDescriptions[config->type], Err2);
+                      kDescriptions[config->type], Err2);
        }
      } else {
        ERROR_GOTO1("ERROR: Invalid feature for action 'strip'.\n", Err2);
@@ -1234,7 +1244,7 @@ static int Process(const Config* config) {
    }
  }
- Err2:
+Err2:
  WebPMuxDelete(mux);
  return ok;
 }
--- a/extras/extras.h
+++ b/extras/extras.h
@@ -49,10 +49,10 @@ WEBP_EXTERN int WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
 // MAX_PALETTE_SIZE. 'pic' must have been initialized. Its content, if any,
 // will be discarded. Returns 'false' in case of error, or if indexed[] contains
 // invalid indices.
-WEBP_EXTERN int
+WEBP_EXTERN int WebPImportColorMappedARGB(const uint8_t* indexed,
-WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
+                                          int indexed_stride,
-                          const uint32_t palette[], int palette_size,
+                                          const uint32_t palette[],
-                          WebPPicture* pic);
+                                          int palette_size, WebPPicture* pic);
 // Convert the ARGB content of 'pic' from associated to unassociated.
 // 'pic' can be for instance the result of calling of some WebPPictureImportXXX
@@ -98,10 +98,12 @@ WEBP_EXTERN int VP8EstimateQuality(const uint8_t* const data, size_t size);
 //     currently supported.
 // width, height: width and height of the image in pixels
 // Returns 0 on failure.
-WEBP_EXTERN int SharpYuvEstimate420Risk(
+WEBP_EXTERN int SharpYuvEstimate420Risk(const void* r_ptr, const void* g_ptr,
-    const void* r_ptr, const void* g_ptr, const void* b_ptr, int rgb_step,
+                                        const void* b_ptr, int rgb_step,
-    int rgb_stride, int rgb_bit_depth, int width, int height,
+                                        int rgb_stride, int rgb_bit_depth,
-    const SharpYuvOptions* options, float* score);
+                                        int width, int height,
                                        const SharpYuvOptions* options,
                                        float* score);
 //------------------------------------------------------------------------------
--- a/extras/get_disto.c
+++ b/extras/get_disto.c
@@ -48,7 +48,7 @@ static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
  reader = WebPGuessImageReader(data, data_size);
  ok = reader(data, data_size, pic, keep_alpha, NULL);
- End:
+End:
  if (!ok) {
    WFPRINTF(stderr, "Error! Could not process file %s\n",
             (const W_CHAR*)filename);
@@ -57,8 +57,8 @@ static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
  return ok ? data_size : 0;
 }
-static void RescalePlane(uint8_t* plane, int width, int height,
+static void RescalePlane(uint8_t* plane, int width, int height, int x_stride,
-                         int x_stride, int y_stride, int max) {
+                         int y_stride, int max) {
  const uint32_t factor = (max > 0) ? (255u << 16) / max : 0;
  int x, y;
  for (y = 0; y < height; ++y) {
@@ -71,9 +71,9 @@ static void RescalePlane(uint8_t* plane, int width, int height,
 }
 // Return the max absolute difference.
-static int DiffScaleChannel(uint8_t* src1, int stride1,
+static int DiffScaleChannel(uint8_t* src1, int stride1, const uint8_t* src2,
-                            const uint8_t* src2, int stride2,
+                            int stride2, int x_stride, int w, int h,
-                            int x_stride, int w, int h, int do_scaling) {
+                            int do_scaling) {
  int x, y;
  int max = 0;
  for (y = 0; y < h; ++y) {
@@ -105,7 +105,7 @@ typedef struct {
 } DistoStats;
 // hat-shaped filter. Sum of coefficients is equal to 16.
-static const uint32_t kWeight[2 * SSIM_KERNEL + 1] = { 1, 2, 3, 4, 3, 2, 1 };
+static const uint32_t kWeight[2 * SSIM_KERNEL + 1] = {1, 2, 3, 4, 3, 2, 1};
 static WEBP_INLINE double SSIMCalculation(const DistoStats* const stats) {
  const uint32_t N = stats->w;
@@ -133,9 +133,9 @@ static WEBP_INLINE double SSIMCalculation(const DistoStats* const stats) {
 }
 static double SSIMGetClipped(const uint8_t* src1, int stride1,
-                             const uint8_t* src2, int stride2,
+                             const uint8_t* src2, int stride2, int xo, int yo,
-                             int xo, int yo, int W, int H) {
+                             int W, int H) {
-  DistoStats stats = { 0, 0, 0, 0, 0, 0 };
+  DistoStats stats = {0, 0, 0, 0, 0, 0};
  const int ymin = (yo - SSIM_KERNEL < 0) ? 0 : yo - SSIM_KERNEL;
  const int ymax = (yo + SSIM_KERNEL > H - 1) ? H - 1 : yo + SSIM_KERNEL;
  const int xmin = (xo - SSIM_KERNEL < 0) ? 0 : xo - SSIM_KERNEL;
@@ -145,8 +145,8 @@ static double SSIMGetClipped(const uint8_t* src1, int stride1,
  src2 += ymin * stride2;
  for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
    for (x = xmin; x <= xmax; ++x) {
-      const uint32_t w = kWeight[SSIM_KERNEL + x - xo]
+      const uint32_t w =
-                       * kWeight[SSIM_KERNEL + y - yo];
+          kWeight[SSIM_KERNEL + x - xo] * kWeight[SSIM_KERNEL + y - yo];
      const uint32_t s1 = src1[x];
      const uint32_t s2 = src2[x];
      stats.w += w;
@@ -161,9 +161,9 @@ static double SSIMGetClipped(const uint8_t* src1, int stride1,
 }
 // Compute SSIM-score map. Return -1 in case of error, max diff otherwise.
-static int SSIMScaleChannel(uint8_t* src1, int stride1,
+static int SSIMScaleChannel(uint8_t* src1, int stride1, const uint8_t* src2,
-                            const uint8_t* src2, int stride2,
+                            int stride2, int x_stride, int w, int h,
-                            int x_stride, int w, int h, int do_scaling) {
+                            int do_scaling) {
  int x, y;
  int max = 0;
  uint8_t* const plane1 = (uint8_t*)malloc(2 * w * h * sizeof(*plane1));
@@ -297,8 +297,7 @@ int main(int argc, const char* argv[]) {
    fprintf(stderr, "Error while computing the distortion.\n");
    goto End;
  }
-  printf("%u %.2f    %.2f %.2f %.2f %.2f [ %.2f bpp ]\n",
+  printf("%u %.2f    %.2f %.2f %.2f %.2f [ %.2f bpp ]\n", (unsigned int)size1,
         (unsigned int)size1,
         disto[4], disto[0], disto[1], disto[2], disto[3],
         8.f * size1 / pic1.width / pic1.height);
@@ -306,7 +305,8 @@ int main(int argc, const char* argv[]) {
    uint8_t* data = NULL;
    size_t data_size = 0;
    if (pic1.use_argb != pic2.use_argb) {
-      fprintf(stderr, "Pictures are not in the same argb format. "
+      fprintf(stderr,
              "Pictures are not in the same argb format. "
              "Can't save the difference map.\n");
      goto End;
    }
@@ -314,13 +314,16 @@ int main(int argc, const char* argv[]) {
      int n;
      fprintf(stderr, "max differences per channel: ");
      for (n = 0; n < 3; ++n) {  // skip the alpha channel
-        const int range = (type == 1) ?
+        const int range =
-          SSIMScaleChannel((uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
+            (type == 1)
-                           (const uint8_t*)pic2.argb + n, pic2.argb_stride * 4,
+                ? SSIMScaleChannel(
-                           4, pic1.width, pic1.height, scale) :
+                      (uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
-          DiffScaleChannel((uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
+                      (const uint8_t*)pic2.argb + n, pic2.argb_stride * 4, 4,
-                           (const uint8_t*)pic2.argb + n, pic2.argb_stride * 4,
+                      pic1.width, pic1.height, scale)
-                           4, pic1.width, pic1.height, scale);
+                : DiffScaleChannel(
                      (uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
                      (const uint8_t*)pic2.argb + n, pic2.argb_stride * 4, 4,
                      pic1.width, pic1.height, scale);
        if (range < 0) fprintf(stderr, "\nError computing diff map\n");
        fprintf(stderr, "[%d]", range);
      }
@@ -331,10 +334,9 @@ int main(int argc, const char* argv[]) {
      goto End;
    }
 #if !defined(WEBP_REDUCE_CSP)
-    data_size = WebPEncodeLosslessBGRA((const uint8_t*)pic1.argb,
+    data_size =
-                                       pic1.width, pic1.height,
+        WebPEncodeLosslessBGRA((const uint8_t*)pic1.argb, pic1.width,
-                                       pic1.argb_stride * 4,
+                               pic1.height, pic1.argb_stride * 4, &data);
                                       &data);
    if (data_size == 0) {
      fprintf(stderr, "Error during lossless encoding.\n");
      goto End;
@@ -346,14 +348,15 @@ int main(int argc, const char* argv[]) {
 #else
    (void)data;
    (void)data_size;
-    fprintf(stderr, "Cannot save the difference map. Please recompile "
+    fprintf(stderr,
            "Cannot save the difference map. Please recompile "
            "without the WEBP_REDUCE_CSP flag.\n");
    goto End;
 #endif  // WEBP_REDUCE_CSP
  }
  ret = EXIT_SUCCESS;
- End:
+End:
  WebPPictureFree(&pic1);
  WebPPictureFree(&pic2);
  FREE_WARGV_AND_RETURN(ret);
--- a/extras/quality_estimate.c
+++ b/extras/quality_estimate.c
@@ -83,7 +83,7 @@ int VP8EstimateQuality(const uint8_t* const data, size_t size) {
    const int update_map = GET_BIT(1);
    if (GET_BIT(1)) {  // update data
      const int absolute_delta = GET_BIT(1);
-      int q[4]  = { 0, 0, 0, 0 };
+      int q[4] = {0, 0, 0, 0};
      for (s = 0; s < 4; ++s) {
        if (GET_BIT(1)) {
          q[s] = GET_BIT(7);
--- a/extras/vwebp_sdl.c
+++ b/extras/vwebp_sdl.c
@@ -23,10 +23,10 @@
 #if defined(WEBP_HAVE_SDL)
 #include "webp_to_sdl.h"
 #include "webp/decode.h"
 #include "imageio/imageio_util.h"
 #include "../examples/unicode.h"
 #include "imageio/imageio_util.h"
 #include "webp/decode.h"
 #include "webp_to_sdl.h"
 #if defined(WEBP_HAVE_JUST_SDL_H)
 #include <SDL.h>
@@ -41,11 +41,15 @@ static void ProcessEvents(void) {
    switch (event.type) {
      case SDL_KEYUP:
        switch (event.key.keysym.sym) {
-          case SDLK_q: done = 1; break;
+          case SDLK_q:
-          default: break;
+            done = 1;
            break;
          default:
            break;
        }
        break;
-      default: break;
+      default:
        break;
    }
  }
 }
@@ -93,7 +97,7 @@ int main(int argc, char* argv[]) {
  }
  ok = 1;
- Error:
+Error:
  SDL_Quit();
  FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }
--- a/extras/webp_quality.c
+++ b/extras/webp_quality.c
@@ -12,9 +12,9 @@
 #include <string.h>
 #include "../examples/unicode.h"
 #include "src/webp/types.h"
 #include "extras/extras.h"
 #include "imageio/imageio_util.h"
 #include "src/webp/types.h"
 // Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
--- a/extras/webp_to_sdl.c
+++ b/extras/webp_to_sdl.c
@@ -17,11 +17,10 @@
 #if defined(WEBP_HAVE_SDL)
 #include "webp_to_sdl.h"
 #include <stdio.h>
 #include "src/webp/decode.h"
 #include "webp_to_sdl.h"
 #if defined(WEBP_HAVE_JUST_SDL_H)
 #include <SDL.h>
@@ -67,11 +66,11 @@ int WebPToSDL(const char* data, unsigned int data_size) {
  }
 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
-  output = WebPDecodeBGRA((const uint8_t*)data, (size_t)data_size, &width,
+  output =
-                          &height);
+      WebPDecodeBGRA((const uint8_t*)data, (size_t)data_size, &width, &height);
 #else
-  output = WebPDecodeRGBA((const uint8_t*)data, (size_t)data_size, &width,
+  output =
-                          &height);
+      WebPDecodeRGBA((const uint8_t*)data, (size_t)data_size, &width, &height);
 #endif
  if (output == NULL) {
    fprintf(stderr, "Error decoding image (%d)\n", status);
@@ -84,7 +83,7 @@ int WebPToSDL(const char* data, unsigned int data_size) {
  SDL_RenderPresent(renderer);
  ok = 1;
- Error:
+Error:
  // We should call SDL_DestroyWindow(window) but that makes .js fail.
  SDL_DestroyRenderer(renderer);
  SDL_DestroyTexture(texture);
--- a/imageio/image_dec.c
+++ b/imageio/image_dec.c
@@ -9,9 +9,10 @@
 //
 // Generic image-type guessing.
 #include "./image_dec.h"
 #include <stddef.h>
 #include "./image_dec.h"
 #include "./metadata.h"
 #include "webp/encode.h"
 #include "webp/types.h"
@@ -62,8 +63,8 @@ WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,
 }
 static int FailReader(const uint8_t* const data, size_t data_size,
-                      struct WebPPicture* const pic,
+                      struct WebPPicture* const pic, int keep_alpha,
-                      int keep_alpha, struct Metadata* const metadata) {
+                      struct Metadata* const metadata) {
  (void)data;
  (void)data_size;
  (void)pic;
@@ -74,12 +75,18 @@ static int FailReader(const uint8_t* const data, size_t data_size,
 WebPImageReader WebPGetImageReader(WebPInputFileFormat format) {
  switch (format) {
-    case WEBP_PNG_FORMAT: return ReadPNG;
+    case WEBP_PNG_FORMAT:
-    case WEBP_JPEG_FORMAT: return ReadJPEG;
+      return ReadPNG;
-    case WEBP_TIFF_FORMAT: return ReadTIFF;
+    case WEBP_JPEG_FORMAT:
-    case WEBP_WEBP_FORMAT: return ReadWebP;
+      return ReadJPEG;
-    case WEBP_PNM_FORMAT: return ReadPNM;
+    case WEBP_TIFF_FORMAT:
-    default: return FailReader;
+      return ReadTIFF;
    case WEBP_WEBP_FORMAT:
      return ReadWebP;
    case WEBP_PNM_FORMAT:
      return ReadPNM;
    default:
      return FailReader;
  }
 }
--- a/imageio/image_dec.h
+++ b/imageio/image_dec.h
@@ -22,8 +22,8 @@
 #include "webp/config.h"
 #endif
 #include "./metadata.h"
 #include "./jpegdec.h"
 #include "./metadata.h"
 #include "./pngdec.h"
 #include "./pnmdec.h"
 #include "./tiffdec.h"
@@ -53,8 +53,8 @@ WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,
 // Signature for common image-reading functions (ReadPNG, ReadJPEG, ...)
 typedef int (*WebPImageReader)(const uint8_t* const data, size_t data_size,
-                               struct WebPPicture* const pic,
+                               struct WebPPicture* const pic, int keep_alpha,
-                               int keep_alpha, struct Metadata* const metadata);
+                               struct Metadata* const metadata);
 // Return the reader associated to a given file format.
 WebPImageReader WebPGetImageReader(WebPInputFileFormat format);
--- a/imageio/image_enc.c
+++ b/imageio/image_enc.c
@@ -26,13 +26,14 @@
 #endif
 #define CINTERFACE
 #define COBJMACROS
-#define _WIN32_IE 0x500  // Workaround bug in shlwapi.h when compiling C++
+#define _WIN32_IE \
  0x500            // Workaround bug in shlwapi.h when compiling C++
                   // code with COBJMACROS.
 #include <ole2.h>  // CreateStreamOnHGlobal()
 #include <shlwapi.h>
 #include <tchar.h>
 #include <windows.h>
 #include <wincodec.h>
 #include <windows.h>
 #endif
 #include "../examples/unicode.h"
@@ -59,8 +60,8 @@
 #define MAKE_REFGUID(x) &(x)
 #endif
-static HRESULT CreateOutputStream(const char* out_file_name,
+static HRESULT CreateOutputStream(const char* out_file_name, int write_to_mem,
-                                  int write_to_mem, IStream** stream) {
+                                  IStream** stream) {
  HRESULT hr = S_OK;
  if (write_to_mem) {
    // Output to a memory buffer. This is freed when 'stream' is released.
@@ -77,24 +78,22 @@ static HRESULT CreateOutputStream(const char* out_file_name,
 }
 static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
-                             REFGUID container_guid,
+                             REFGUID container_guid, uint8_t* rgb, int stride,
                             uint8_t* rgb, int stride,
                             uint32_t width, uint32_t height, int has_alpha) {
  HRESULT hr = S_OK;
  IWICImagingFactory* factory = NULL;
  IWICBitmapFrameEncode* frame = NULL;
  IWICBitmapEncoder* encoder = NULL;
  IStream* stream = NULL;
-  WICPixelFormatGUID pixel_format = has_alpha ? GUID_WICPixelFormat32bppBGRA
+  WICPixelFormatGUID pixel_format =
-                                              : GUID_WICPixelFormat24bppBGR;
+      has_alpha ? GUID_WICPixelFormat32bppBGRA : GUID_WICPixelFormat24bppBGR;
  if (out_file_name == NULL || rgb == NULL) return E_INVALIDARG;
  IFS(CoInitialize(NULL));
-  IFS(CoCreateInstance(MAKE_REFGUID(CLSID_WICImagingFactory), NULL,
+  IFS(CoCreateInstance(
-                       CLSCTX_INPROC_SERVER,
+      MAKE_REFGUID(CLSID_WICImagingFactory), NULL, CLSCTX_INPROC_SERVER,
-                       MAKE_REFGUID(IID_IWICImagingFactory),
+      MAKE_REFGUID(IID_IWICImagingFactory), (LPVOID*)&factory));
                       (LPVOID*)&factory));
  if (hr == REGDB_E_CLASSNOTREG) {
    fprintf(stderr,
            "Couldn't access Windows Imaging Component (are you running "
@@ -104,14 +103,13 @@ static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
  IFS(CreateOutputStream(out_file_name, use_stdout, &stream));
  IFS(IWICImagingFactory_CreateEncoder(factory, container_guid, NULL,
                                       &encoder));
-  IFS(IWICBitmapEncoder_Initialize(encoder, stream,
+  IFS(IWICBitmapEncoder_Initialize(encoder, stream, WICBitmapEncoderNoCache));
                                   WICBitmapEncoderNoCache));
  IFS(IWICBitmapEncoder_CreateNewFrame(encoder, &frame, NULL));
  IFS(IWICBitmapFrameEncode_Initialize(frame, NULL));
  IFS(IWICBitmapFrameEncode_SetSize(frame, width, height));
  IFS(IWICBitmapFrameEncode_SetPixelFormat(frame, &pixel_format));
-  IFS(IWICBitmapFrameEncode_WritePixels(frame, height, stride,
+  IFS(IWICBitmapFrameEncode_WritePixels(frame, height, stride, height * stride,
-                                        height * stride, rgb));
+                                        rgb));
  IFS(IWICBitmapFrameEncode_Commit(frame));
  IFS(IWICBitmapEncoder_Commit(encoder));
@@ -153,8 +151,8 @@ int WebPWritePNG(const char* out_file_name, int use_stdout,
  const int has_alpha = WebPIsAlphaMode(buffer->colorspace);
  return SUCCEEDED(WriteUsingWIC(out_file_name, use_stdout,
-                                 MAKE_REFGUID(GUID_ContainerFormatPng),
+                                 MAKE_REFGUID(GUID_ContainerFormatPng), rgb,
-                                 rgb, stride, width, height, has_alpha));
+                                 stride, width, height, has_alpha));
 }
 #elif defined(WEBP_HAVE_PNG)  // !HAVE_WINCODEC_H
@@ -169,8 +167,8 @@ int WebPWritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
  if (out_file == NULL || buffer == NULL) return 0;
-  png = png_create_write_struct(PNG_LIBPNG_VER_STRING,
+  png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, PNGErrorFunction,
-                                NULL, PNGErrorFunction, NULL);
+                                NULL);
  if (png == NULL) {
    return 0;
  }
@@ -210,7 +208,8 @@ int WebPWritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
 int WebPWritePNG(FILE* fout, const WebPDecBuffer* const buffer) {
  if (fout == NULL || buffer == NULL) return 0;
-  fprintf(stderr, "PNG support not compiled. Please install the libpng "
+  fprintf(stderr,
          "PNG support not compiled. Please install the libpng "
          "development package before building.\n");
  fprintf(stderr, "You can run with -ppm flag to decode in PPM format.\n");
  return 0;
@@ -235,8 +234,10 @@ static int WritePPMPAM(FILE* fout, const WebPDecBuffer* const buffer,
    if (row == NULL) return 0;
    if (alpha) {
-      fprintf(fout, "P7\nWIDTH %u\nHEIGHT %u\nDEPTH 4\nMAXVAL 255\n"
+      fprintf(fout,
-                    "TUPLTYPE RGB_ALPHA\nENDHDR\n", width, height);
+              "P7\nWIDTH %u\nHEIGHT %u\nDEPTH 4\nMAXVAL 255\n"
              "TUPLTYPE RGB_ALPHA\nENDHDR\n",
              width, height);
    } else {
      fprintf(fout, "P6\n%u %u\n255\n", width, height);
    }
@@ -310,7 +311,7 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
  int stride;
  uint32_t y;
  uint32_t bytes_per_px, line_size, image_size, bmp_stride, total_size;
-  uint8_t bmp_header[BMP_HEADER_SIZE + BMP_HEADER_ALPHA_EXTRA_SIZE] = { 0 };
+  uint8_t bmp_header[BMP_HEADER_SIZE + BMP_HEADER_ALPHA_EXTRA_SIZE] = {0};
  if (fout == NULL || buffer == NULL) return 0;
@@ -367,7 +368,7 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
    }
    // write padding zeroes
    if (bmp_stride != line_size) {
-      const uint8_t zeroes[3] = { 0 };
+      const uint8_t zeroes[3] = {0};
      if (fwrite(zeroes, bmp_stride - line_size, 1, fout) != 1) {
        return 0;
      }
@@ -404,24 +405,24 @@ int WebPWriteTIFF(FILE* fout, const WebPDecBuffer* const buffer) {
      0x00, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,  //  10: Width  (TBD)
      0x01, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,  //  22: Height (TBD)
      0x02, 0x01, 3, 0, bytes_per_px, 0, 0, 0,   //  34: BitsPerSample: 8888
-        EXTRA_DATA_OFFSET + 0, 0, 0, 0,
+      EXTRA_DATA_OFFSET + 0, 0, 0, 0, 0x03, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0,
-    0x03, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    //  46: Compression: none
+      0,                                         //  46: Compression: none
      0x06, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,  //  58: Photometric: RGB
      0x11, 0x01, 4, 0, 1, 0, 0, 0,              //  70: Strips offset:
      TIFF_HEADER_SIZE, 0, 0, 0,                 //      data follows header
      0x12, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,  //  82: Orientation: topleft
      0x15, 0x01, 3, 0, 1, 0, 0, 0,              //  94: SamplesPerPixels
-        bytes_per_px, 0, 0, 0,
+      bytes_per_px, 0, 0, 0, 0x16, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0,
-    0x16, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,    // 106: Rows per strip (TBD)
+      0,                                         // 106: Rows per strip (TBD)
      0x17, 0x01, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0,  // 118: StripByteCount (TBD)
      0x1a, 0x01, 5, 0, 1, 0, 0, 0,              // 130: X-resolution
-        EXTRA_DATA_OFFSET + 8, 0, 0, 0,
+      EXTRA_DATA_OFFSET + 8, 0, 0, 0, 0x1b, 0x01, 5, 0, 1, 0, 0,
-    0x1b, 0x01, 5, 0, 1, 0, 0, 0,                // 142: Y-resolution
+      0,  // 142: Y-resolution
-        EXTRA_DATA_OFFSET + 8, 0, 0, 0,
+      EXTRA_DATA_OFFSET + 8, 0, 0, 0, 0x1c, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0,
-    0x1c, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    // 154: PlanarConfiguration
+      0,                                         // 154: PlanarConfiguration
      0x28, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,  // 166: ResolutionUnit (inch)
-    0x52, 0x01, 3, 0, 1, 0, 0, 0,
+      0x52, 0x01, 3, 0, 1, 0, 0, 0, assoc_alpha, 0, 0,
-        assoc_alpha, 0, 0, 0,                    // 178: ExtraSamples: rgbA/RGBA
+      0,           // 178: ExtraSamples: rgbA/RGBA
      0, 0, 0, 0,  // 190: IFD terminator
      // EXTRA_DATA_OFFSET:
      8, 0, 8, 0, 8, 0, 8, 0,  // BitsPerSample
@@ -517,8 +518,8 @@ int WebPWritePGM(FILE* fout, const WebPDecBuffer* const buffer) {
    if (src_y == NULL || src_u == NULL || src_v == NULL) return 0;
-    fprintf(fout, "P5\n%d %d\n255\n",
+    fprintf(fout, "P5\n%d %d\n255\n", (width + 1) & ~1,
-            (width + 1) & ~1, height + uv_height + a_height);
+            height + uv_height + a_height);
    for (y = 0; ok && y < height; ++y) {
      ok &= (fwrite(src_y, width, 1, fout) == 1);
      if (width & 1) fputc(0, fout);  // padding byte
@@ -609,8 +610,7 @@ int WebPSaveImage(const WebPDecBuffer* const buffer,
    }
  }
-  if (format == PNG ||
+  if (format == PNG || format == RGBA || format == BGRA || format == ARGB ||
      format == RGBA || format == BGRA || format == ARGB ||
      format == rgbA || format == bgrA || format == Argb) {
 #ifdef HAVE_WINCODEC_H
    ok &= WebPWritePNG(out_file_name, use_stdout, buffer);
--- a/imageio/image_enc.h
+++ b/imageio/image_enc.h
@@ -20,8 +20,8 @@
 #include "webp/config.h"
 #endif
 #include "webp/types.h"
 #include "webp/decode.h"
 #include "webp/types.h"
 #ifdef __cplusplus
 extern "C" {
@@ -38,10 +38,19 @@ typedef enum {
  RAW_YUV,
  ALPHA_PLANE_ONLY,  // this is for experimenting only
  // forced colorspace output (for testing, mostly)
-  RGB, RGBA, BGR, BGRA, ARGB,
+  RGB,
-  RGBA_4444, RGB_565,
+  RGBA,
-  rgbA, bgrA, Argb, rgbA_4444,
+  BGR,
-  YUV, YUVA
+  BGRA,
  ARGB,
  RGBA_4444,
  RGB_565,
  rgbA,
  bgrA,
  Argb,
  rgbA_4444,
  YUV,
  YUVA
 } WebPOutputFileFormat;
 // General all-purpose call.
--- a/imageio/imageio_util.c
+++ b/imageio/imageio_util.c
@@ -20,8 +20,8 @@
 #include <stdlib.h>
 #include <string.h>
 #include "webp/types.h"
 #include "../examples/unicode.h"
 #include "webp/types.h"
 // -----------------------------------------------------------------------------
 // File I/O
@@ -65,14 +65,14 @@ int ImgIoUtilReadFromStdin(const uint8_t** data, size_t* data_size) {
  *data_size = size;
  return 1;
- Error:
+Error:
  free(input);
  fprintf(stderr, "Could not read from stdin\n");
  return 0;
 }
-int ImgIoUtilReadFile(const char* const file_name,
+int ImgIoUtilReadFile(const char* const file_name, const uint8_t** data,
-                      const uint8_t** data, size_t* data_size) {
+                      size_t* data_size) {
  int ok;
  uint8_t* file_data;
  size_t file_size;
@@ -123,8 +123,8 @@ int ImgIoUtilReadFile(const char* const file_name,
 // -----------------------------------------------------------------------------
-int ImgIoUtilWriteFile(const char* const file_name,
+int ImgIoUtilWriteFile(const char* const file_name, const uint8_t* data,
-                       const uint8_t* data, size_t data_size) {
+                       size_t data_size) {
  int ok;
  FILE* out;
  const int to_stdout = (file_name == NULL) || !WSTRCMP(file_name, "-");
@@ -145,8 +145,8 @@ int ImgIoUtilWriteFile(const char* const file_name,
 // -----------------------------------------------------------------------------
-void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
+void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst,
-                        uint8_t* dst, int dst_stride, int width, int height) {
+                        int dst_stride, int width, int height) {
  while (height-- > 0) {
    memcpy(dst, src, width * sizeof(*dst));
    src += src_stride;
--- a/imageio/imageio_util.h
+++ b/imageio/imageio_util.h
@@ -36,22 +36,22 @@ FILE* ImgIoUtilSetBinaryMode(FILE* file);
 // to be used as a C-string.
 // If 'file_name' is NULL or equal to "-", input is read from stdin by calling
 // the function ImgIoUtilReadFromStdin().
-int ImgIoUtilReadFile(const char* const file_name,
+int ImgIoUtilReadFile(const char* const file_name, const uint8_t** data,
-                      const uint8_t** data, size_t* data_size);
+                      size_t* data_size);
 // Same as ImgIoUtilReadFile(), but reads until EOF from stdin instead.
 int ImgIoUtilReadFromStdin(const uint8_t** data, size_t* data_size);
 // Write a data segment into a file named 'file_name'. Returns true if ok.
 // If 'file_name' is NULL or equal to "-", output is written to stdout.
-int ImgIoUtilWriteFile(const char* const file_name,
+int ImgIoUtilWriteFile(const char* const file_name, const uint8_t* data,
-                       const uint8_t* data, size_t data_size);
+                       size_t data_size);
 //------------------------------------------------------------------------------
 // Copy width x height pixels from 'src' to 'dst' honoring the strides.
-void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
+void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst,
-                        uint8_t* dst, int dst_stride, int width, int height);
+                        int dst_stride, int width, int height);
 //------------------------------------------------------------------------------
--- a/imageio/jpegdec.c
+++ b/imageio/jpegdec.c
@@ -18,8 +18,8 @@
 #include <stdio.h>
 #ifdef WEBP_HAVE_JPEG
 #include <jpeglib.h>
 #include <jerror.h>
 #include <jpeglib.h>
 #include <setjmp.h>
 #include <stdlib.h>
 #include <string.h>
@@ -33,10 +33,10 @@
 // Metadata processing
 #ifndef JPEG_APP1
-# define JPEG_APP1 (JPEG_APP0 + 1)
+#define JPEG_APP1 (JPEG_APP0 + 1)
 #endif
 #ifndef JPEG_APP2
-# define JPEG_APP2 (JPEG_APP0 + 2)
+#define JPEG_APP2 (JPEG_APP0 + 2)
 #endif
 typedef struct {
@@ -74,8 +74,7 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
  memset(iccp_segments, 0, sizeof(iccp_segments));
  for (marker = dinfo->marker_list; marker != NULL; marker = marker->next) {
-    if (marker->marker == JPEG_APP2 &&
+    if (marker->marker == JPEG_APP2 && marker->data_length > kICCPSkipLength &&
        marker->data_length > kICCPSkipLength &&
        !memcmp(marker->data, kICCPSignature, kICCPSignatureLength)) {
      // ICC_PROFILE\0<seq><count>; 'seq' starts at 1.
      const int seq = marker->data[kICCPSignatureLength];
@@ -84,7 +83,8 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
      ICCPSegment* segment;
      if (segment_size == 0 || count == 0 || seq == 0) {
-        fprintf(stderr, "[ICCP] size (%d) / count (%d) / sequence number (%d)"
+        fprintf(stderr,
                "[ICCP] size (%d) / count (%d) / sequence number (%d)"
                " cannot be 0!\n",
                (int)segment_size, seq, count);
        return 0;
@@ -100,7 +100,7 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
      segment = iccp_segments + seq - 1;
      if (segment->data_length != 0) {
-        fprintf(stderr, "[ICCP] Duplicate segment number (%d)!\n" , seq);
+        fprintf(stderr, "[ICCP] Duplicate segment number (%d)!\n", seq);
        return 0;
      }
@@ -138,8 +138,8 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
    int i;
    size_t offset = 0;
    for (i = 0; i < seq_max; ++i) {
-      memcpy(iccp->bytes + offset,
+      memcpy(iccp->bytes + offset, iccp_segments[i].data,
-             iccp_segments[i].data, iccp_segments[i].data_length);
+             iccp_segments[i].data_length);
      offset += iccp_segments[i].data_length;
    }
  }
@@ -157,11 +157,11 @@ static int ExtractMetadataFromJPEG(j_decompress_ptr dinfo,
    size_t storage_offset;
  } kJPEGMetadataMap[] = {
      // Exif 2.2 Section 4.7.2 Interoperability Structure of APP1 ...
-    { JPEG_APP1, "Exif\0",                        6, METADATA_OFFSET(exif) },
+      {JPEG_APP1, "Exif\0", 6, METADATA_OFFSET(exif)},
      // XMP Specification Part 3 Section 3 Embedding XMP Metadata ... #JPEG
      // TODO(jzern) Add support for 'ExtendedXMP'
-    { JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, METADATA_OFFSET(xmp) },
+      {JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, METADATA_OFFSET(xmp)},
-    { 0, NULL, 0, 0 },
+      {0, NULL, 0, 0},
  };
  jpeg_saved_marker_ptr marker;
  // Treat ICC profiles separately as they may be segmented and out of order.
@@ -179,8 +179,8 @@ static int ExtractMetadataFromJPEG(j_decompress_ptr dinfo,
                               kJPEGMetadataMap[i].storage_offset);
        if (payload->bytes == NULL) {
-          const char* marker_data = (const char*)marker->data +
+          const char* marker_data =
-                                    kJPEGMetadataMap[i].signature_length;
+              (const char*)marker->data + kJPEGMetadataMap[i].signature_length;
          const size_t marker_data_length =
              marker->data_length - kJPEGMetadataMap[i].signature_length;
          if (!MetadataCopy(marker_data, marker_data_length, payload)) return 0;
@@ -250,9 +250,7 @@ static void ContextSkip(j_decompress_ptr cinfo, long jump_size) {
  ctx->pub.next_input_byte += jump;
 }
-static void ContextTerm(j_decompress_ptr cinfo) {
+static void ContextTerm(j_decompress_ptr cinfo) { (void)cinfo; }
  (void)cinfo;
 }
 static void ContextSetup(volatile struct jpeg_decompress_struct* const cinfo,
                         JPEGReadContext* const ctx) {
@@ -267,8 +265,7 @@ static void ContextSetup(volatile struct jpeg_decompress_struct* const cinfo,
 }
 int ReadJPEG(const uint8_t* const data, size_t data_size,
-             WebPPicture* const pic, int keep_alpha,
+             WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
             Metadata* const metadata) {
  volatile int ok = 0;
  int width, height;
  int64_t stride;
@@ -353,7 +350,7 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
    MetadataFree(metadata);  // In case the caller forgets to free it on error.
  }
- End:
+End:
  free(rgb);
  return ok;
 }
@@ -366,7 +363,8 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
-  fprintf(stderr, "JPEG support not compiled. Please install the libjpeg "
+  fprintf(stderr,
          "JPEG support not compiled. Please install the libjpeg "
          "development package before building.\n");
  return 0;
 }
--- a/imageio/pngdec.c
+++ b/imageio/pngdec.c
@@ -22,7 +22,6 @@
 #define PNG_USER_MEM_SUPPORTED  // for png_create_read_struct_2
 #endif
 #include <png.h>
 #include <setjmp.h>  // note: this must be included *after* png.h
 #include <stdlib.h>
 #include <string.h>
@@ -33,15 +32,14 @@
 #include "webp/types.h"
 #define LOCAL_PNG_VERSION ((PNG_LIBPNG_VER_MAJOR << 8) | PNG_LIBPNG_VER_MINOR)
-#define LOCAL_PNG_PREREQ(maj, min) \
+#define LOCAL_PNG_PREREQ(maj, min) (LOCAL_PNG_VERSION >= (((maj) << 8) | (min)))
   (LOCAL_PNG_VERSION >= (((maj) << 8) | (min)))
 static void PNGAPI error_function(png_structp png, png_const_charp error) {
  if (error != NULL) fprintf(stderr, "libpng error: %s\n", error);
  longjmp(png_jmpbuf(png), 1);
 }
-#if LOCAL_PNG_PREREQ(1,4)
+#if LOCAL_PNG_PREREQ(1, 4)
 typedef png_alloc_size_t LocalPngAllocSize;
 #else
 typedef png_size_t LocalPngAllocSize;
@@ -113,7 +111,8 @@ static int ProcessRawProfile(const char* profile, size_t profile_len,
  }
  ++src;
  // skip the profile name and extract the length.
-  while (*src != '\0' && *src++ != '\n') {}
+  while (*src != '\0' && *src++ != '\n') {
  }
  expected_length = (int)strtol(src, &end, 10);
  if (*end != '\n') {
    fprintf(stderr, "Malformed raw profile, expected '\\n' got '\\x%.2X'\n",
@@ -137,23 +136,22 @@ static const struct {
 } kPNGMetadataMap[] = {
    // https://exiftool.org/TagNames/PNG.html#TextualData
    // See also: ExifTool on CPAN.
-  { "Raw profile type exif", ProcessRawProfile, METADATA_OFFSET(exif) },
+    {"Raw profile type exif", ProcessRawProfile, METADATA_OFFSET(exif)},
-  { "Raw profile type xmp",  ProcessRawProfile, METADATA_OFFSET(xmp) },
+    {"Raw profile type xmp", ProcessRawProfile, METADATA_OFFSET(xmp)},
    // Exiftool puts exif data in APP1 chunk, too.
-  { "Raw profile type APP1", ProcessRawProfile, METADATA_OFFSET(exif) },
+    {"Raw profile type APP1", ProcessRawProfile, METADATA_OFFSET(exif)},
    // ImageMagick uses lowercase app1.
-  { "Raw profile type app1", ProcessRawProfile, METADATA_OFFSET(exif) },
+    {"Raw profile type app1", ProcessRawProfile, METADATA_OFFSET(exif)},
    // XMP Specification Part 3, Section 3 #PNG
-  { "XML:com.adobe.xmp",     MetadataCopy,      METADATA_OFFSET(xmp) },
+    {"XML:com.adobe.xmp", MetadataCopy, METADATA_OFFSET(xmp)},
-  { NULL, NULL, 0 },
+    {NULL, NULL, 0},
 };
 // Looks for metadata at both the beginning and end of the PNG file, giving
 // preference to the head.
 // Returns true on success. The caller must use MetadataFree() on 'metadata' in
 // all cases.
-static int ExtractMetadataFromPNG(png_structp png,
+static int ExtractMetadataFromPNG(png_structp png, png_infop const head_info,
                                  png_infop const head_info,
                                  png_infop const end_info,
                                  Metadata* const metadata) {
  int p;
@@ -215,15 +213,15 @@ static int ExtractMetadataFromPNG(png_structp png,
    {
      png_charp name;
      int comp_type;
-#if LOCAL_PNG_PREREQ(1,5)
+#if LOCAL_PNG_PREREQ(1, 5)
      png_bytep profile;
 #else
      png_charp profile;
 #endif
      png_uint_32 len;
-      if (png_get_iCCP(png, info,
+      if (png_get_iCCP(png, info, &name, &comp_type, &profile, &len) ==
-                       &name, &comp_type, &profile, &len) == PNG_INFO_iCCP) {
+          PNG_INFO_iCCP) {
        if (!MetadataCopy((const char*)profile, len, &metadata->iccp)) return 0;
      }
    }
@@ -248,12 +246,12 @@ static void ReadFunc(png_structp png_ptr, png_bytep data, png_size_t length) {
 }
 int ReadPNG(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic,
+            struct WebPPicture* const pic, int keep_alpha,
-            int keep_alpha, struct Metadata* const metadata) {
+            struct Metadata* const metadata) {
  volatile png_structp png = NULL;
  volatile png_infop info = NULL;
  volatile png_infop end_info = NULL;
-  PNGReadContext context = { NULL, 0, 0 };
+  PNGReadContext context = {NULL, 0, 0};
  int color_type, bit_depth, interlaced;
  int num_channels;
  int num_passes;
@@ -268,8 +266,8 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
  context.data = data;
  context.data_size = data_size;
-  png = png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL,
+  png = png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL, NULL,
-                                 NULL, MallocFunc, FreeFunc);
+                                 MallocFunc, FreeFunc);
  if (png == NULL) goto End;
  png_set_error_fn(png, 0, error_function, NULL);
@@ -279,8 +277,8 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
    goto End;
  }
-#if LOCAL_PNG_PREREQ(1,5) || \
+#if LOCAL_PNG_PREREQ(1, 5) || \
-    (LOCAL_PNG_PREREQ(1,4) && PNG_LIBPNG_VER_RELEASE >= 1)
+    (LOCAL_PNG_PREREQ(1, 4) && PNG_LIBPNG_VER_RELEASE >= 1)
  // If it looks like the bitstream is going to need more memory than libpng's
  // internal limit (default: 8M), try to (reasonably) raise it.
  if (data_size > png_get_chunk_malloc_max(png) && data_size < (1u << 24)) {
@@ -295,9 +293,9 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
  png_set_read_fn(png, &context, ReadFunc);
  png_read_info(png, info);
-  if (!png_get_IHDR(png, info,
+  if (!png_get_IHDR(png, info, &width, &height, &bit_depth, &color_type,
-                    &width, &height, &bit_depth, &color_type, &interlaced,
+                    &interlaced, NULL, NULL))
-                    NULL, NULL)) goto Error;
+    goto Error;
  png_set_strip_16(png);
  png_set_packing(png);
@@ -368,24 +366,25 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
    goto Error;
  }
- End:
+End:
  if (png != NULL) {
-    png_destroy_read_struct((png_structpp)&png,
+    png_destroy_read_struct((png_structpp)&png, (png_infopp)&info,
-                            (png_infopp)&info, (png_infopp)&end_info);
+                            (png_infopp)&end_info);
  }
  free(rgb);
  return ok;
 }
 #else   // !WEBP_HAVE_PNG
 int ReadPNG(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic,
+            struct WebPPicture* const pic, int keep_alpha,
-            int keep_alpha, struct Metadata* const metadata) {
+            struct Metadata* const metadata) {
  (void)data;
  (void)data_size;
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
-  fprintf(stderr, "PNG support not compiled. Please install the libpng "
+  fprintf(stderr,
          "PNG support not compiled. Please install the libpng "
          "development package before building.\n");
  return 0;
 }
--- a/imageio/pngdec.h
+++ b/imageio/pngdec.h
@@ -29,8 +29,8 @@ struct WebPPicture;
 // or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
 // Returns true on success.
 int ReadPNG(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic,
+            struct WebPPicture* const pic, int keep_alpha,
-            int keep_alpha, struct Metadata* const metadata);
+            struct Metadata* const metadata);
 #ifdef __cplusplus
 }  // extern "C"
--- a/imageio/pnmdec.c
+++ b/imageio/pnmdec.c
@@ -55,7 +55,7 @@ static size_t ReadLine(const uint8_t* const data, size_t off, size_t data_size,
                       char out[MAX_LINE_SIZE + 1], size_t* const out_size) {
  size_t i = 0;
  *out_size = 0;
- redo:
+redo:
  for (i = 0; i < MAX_LINE_SIZE && off < data_size; ++i) {
    out[i] = data[off++];
    if (out[i] == '\n') break;
@@ -173,9 +173,8 @@ static size_t ReadHeader(PNMInfo* const info) {
    info->depth = (info->type == 5) ? 1 : 3;
  }
  // perform some basic numerical validation
-  if (info->width <= 0 || info->height <= 0 ||
+  if (info->width <= 0 || info->height <= 0 || info->type <= 0 ||
-      info->type <= 0 || info->type >= 9 ||
+      info->type >= 9 || info->depth <= 0 || info->depth > 4 ||
      info->depth <= 0 || info->depth > 4 ||
      info->max_value <= 0 || info->max_value >= 65536) {
    return 0;
  }
@@ -183,13 +182,12 @@ static size_t ReadHeader(PNMInfo* const info) {
  return off;
 }
-int ReadPNM(const uint8_t* const data, size_t data_size,
+int ReadPNM(const uint8_t* const data, size_t data_size, WebPPicture* const pic,
-            WebPPicture* const pic, int keep_alpha,
+            int keep_alpha, struct Metadata* const metadata) {
            struct Metadata* const metadata) {
  int ok = 0;
  int i, j;
  uint64_t stride, pixel_bytes, sample_size, depth;
-  uint8_t* rgb = NULL, *tmp_rgb;
+  uint8_t *rgb = NULL, *tmp_rgb;
  size_t offset;
  PNMInfo info;
@@ -209,8 +207,8 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
  // Some basic validations.
  if (pic == NULL) goto End;
  if (info.width > WEBP_MAX_DIMENSION || info.height > WEBP_MAX_DIMENSION) {
-    fprintf(stderr, "Invalid %dx%d dimension for PNM\n",
+    fprintf(stderr, "Invalid %dx%d dimension for PNM\n", info.width,
-                    info.width, info.height);
+            info.height);
    goto End;
  }
@@ -258,8 +256,8 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
      const uint32_t round = info.max_value / 2;
      int k = 0;
      for (i = 0; i < info.width * info.depth; ++i) {
-        uint32_t v = (sample_size == 2) ? 256u * in[2 * i + 0] + in[2 * i + 1]
+        uint32_t v =
-                   : in[i];
+            (sample_size == 2) ? 256u * in[2 * i + 0] + in[2 * i + 1] : in[i];
        if (info.max_value != 255) v = (v * 255u + round) / info.max_value;
        if (v > 255u) v = 255u;
        if (info.depth > 2) {
@@ -291,7 +289,7 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
  if (!ok) goto End;
  ok = 1;
- End:
+End:
  free((void*)rgb);
  (void)metadata;
--- a/imageio/tiffdec.c
+++ b/imageio/tiffdec.c
@@ -31,9 +31,9 @@ static const struct {
  ttag_t tag;
  size_t storage_offset;
 } kTIFFMetadataMap[] = {
-  { TIFFTAG_ICCPROFILE, METADATA_OFFSET(iccp) },
+    {TIFFTAG_ICCPROFILE, METADATA_OFFSET(iccp)},
-  { TIFFTAG_XMLPACKET,  METADATA_OFFSET(xmp) },
+    {TIFFTAG_XMLPACKET, METADATA_OFFSET(xmp)},
-  { 0, 0 },
+    {0, 0},
 };
 // Returns true on success. The caller must use MetadataFree() on 'metadata' in
@@ -128,9 +128,7 @@ static uint32_t Unmult(uint8_t x, uint32_t mult) {
  return (v > 255u) ? 255u : v;
 }
-static WEBP_INLINE uint32_t GetScale(uint32_t a) {
+static WEBP_INLINE uint32_t GetScale(uint32_t a) { return (255u << MFIX) / a; }
  return (255u << MFIX) / a;
 }
 #undef MFIX
 #undef HALF
@@ -153,9 +151,8 @@ static void MultARGBRow(uint8_t* ptr, int width) {
 }
 int ReadTIFF(const uint8_t* const data, size_t data_size,
-             WebPPicture* const pic, int keep_alpha,
+             WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
-             Metadata* const metadata) {
+  MyData my_data = {data, (toff_t)data_size, 0};
  MyData my_data = { data, (toff_t)data_size, 0 };
  TIFF* tif;
  uint32_t image_width, image_height, tile_width, tile_height;
  uint64_t stride;
@@ -171,8 +168,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
    return 0;
  }
-  tif = TIFFClientOpen("Memory", "r", &my_data,
+  tif = TIFFClientOpen("Memory", "r", &my_data, MyRead, MyRead, MySeek, MyClose,
                       MyRead, MyRead, MySeek, MyClose,
                       MySize, MyMapFile, MyUnmapFile);
  if (tif == NULL) {
    fprintf(stderr, "Error! Cannot parse TIFF file\n");
@@ -181,7 +177,8 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
  dircount = TIFFNumberOfDirectories(tif);
  if (dircount > 1) {
-    fprintf(stderr, "Warning: multi-directory TIFF files are not supported.\n"
+    fprintf(stderr,
            "Warning: multi-directory TIFF files are not supported.\n"
            "Only the first will be used, %d will be ignored.\n",
            dircount - 1);
  }
@@ -253,7 +250,8 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
          tmp += stride;
        }
      }
-      ok = keep_alpha
+      ok =
          keep_alpha
              ? WebPPictureImportRGBA(pic, (const uint8_t*)raster, (int)stride)
              : WebPPictureImportRGBX(pic, (const uint8_t*)raster, (int)stride);
    }
@@ -272,7 +270,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
      }
    }
  }
- End:
+End:
  TIFFClose(tif);
  return ok;
 }
@@ -285,7 +283,8 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
-  fprintf(stderr, "TIFF support not compiled. Please install the libtiff "
+  fprintf(stderr,
          "TIFF support not compiled. Please install the libtiff "
          "development package before building.\n");
  return 0;
 }
--- a/imageio/webpdec.c
+++ b/imageio/webpdec.c
@@ -31,9 +31,14 @@
 // WebP decoding
 static const char* const kStatusMessages[VP8_STATUS_NOT_ENOUGH_DATA + 1] = {
-  "OK", "OUT_OF_MEMORY", "INVALID_PARAM", "BITSTREAM_ERROR",
+    "OK",
-  "UNSUPPORTED_FEATURE", "SUSPENDED", "USER_ABORT", "NOT_ENOUGH_DATA"
+    "OUT_OF_MEMORY",
-};
+    "INVALID_PARAM",
    "BITSTREAM_ERROR",
    "UNSUPPORTED_FEATURE",
    "SUSPENDED",
    "USER_ABORT",
    "NOT_ENOUGH_DATA"};
 static void PrintAnimationWarning(const WebPDecoderConfig* const config) {
  if (config->input.has_animation) {
@@ -53,8 +58,7 @@ void PrintWebPError(const char* const in_file, int status) {
  fprintf(stderr, "\n");
 }
-int LoadWebP(const char* const in_file,
+int LoadWebP(const char* const in_file, const uint8_t** data, size_t* data_size,
             const uint8_t** data, size_t* data_size,
             WebPBitstreamFeatures* bitstream) {
  VP8StatusCode status;
  WebPBitstreamFeatures local_features;
@@ -84,8 +88,7 @@ VP8StatusCode DecodeWebP(const uint8_t* const data, size_t data_size,
  return WebPDecode(data, data_size, config);
 }
-VP8StatusCode DecodeWebPIncremental(
+VP8StatusCode DecodeWebPIncremental(const uint8_t* const data, size_t data_size,
    const uint8_t* const data, size_t data_size,
                                    WebPDecoderConfig* const config) {
  VP8StatusCode status = VP8_STATUS_OK;
  if (config == NULL) return VP8_STATUS_INVALID_PARAM;
@@ -111,7 +114,7 @@ VP8StatusCode DecodeWebPIncremental(
 static int ExtractMetadata(const uint8_t* const data, size_t data_size,
                           Metadata* const metadata) {
-  WebPData webp_data = { data, data_size };
+  WebPData webp_data = {data, data_size};
  WebPDemuxer* const demux = WebPDemux(&webp_data);
  WebPChunkIterator chunk_iter;
  uint32_t flags;
@@ -143,8 +146,7 @@ static int ExtractMetadata(const uint8_t* const data, size_t data_size,
 // -----------------------------------------------------------------------------
 int ReadWebP(const uint8_t* const data, size_t data_size,
-             WebPPicture* const pic,
+             WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
             int keep_alpha, Metadata* const metadata) {
  int ok = 0;
  VP8StatusCode status = VP8_STATUS_OK;
  WebPDecoderConfig config;
--- a/imageio/webpdec.h
+++ b/imageio/webpdec.h
@@ -35,8 +35,7 @@ void PrintWebPError(const char* const in_file, int status);
 // Reads a WebP from 'in_file', returning the contents and size in 'data' and
 // 'data_size'. If not NULL, 'bitstream' is populated using WebPGetFeatures().
 // Returns true on success.
-int LoadWebP(const char* const in_file,
+int LoadWebP(const char* const in_file, const uint8_t** data, size_t* data_size,
             const uint8_t** data, size_t* data_size,
             WebPBitstreamFeatures* bitstream);
 // Decodes the WebP contained in 'data'.
@@ -48,8 +47,7 @@ VP8StatusCode DecodeWebP(const uint8_t* const data, size_t data_size,
                         WebPDecoderConfig* const config);
 // Same as DecodeWebP(), but using the incremental decoder.
-VP8StatusCode DecodeWebPIncremental(
+VP8StatusCode DecodeWebPIncremental(const uint8_t* const data, size_t data_size,
    const uint8_t* const data, size_t data_size,
                                    WebPDecoderConfig* const config);
 //------------------------------------------------------------------------------
@@ -60,8 +58,8 @@ VP8StatusCode DecodeWebPIncremental(
 // or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
 // Returns true on success.
 int ReadWebP(const uint8_t* const data, size_t data_size,
-             struct WebPPicture* const pic,
+             struct WebPPicture* const pic, int keep_alpha,
-             int keep_alpha, struct Metadata* const metadata);
+             struct Metadata* const metadata);
 #ifdef __cplusplus
 }  // extern "C"
--- a/imageio/wicdec.c
+++ b/imageio/wicdec.c
@@ -25,13 +25,14 @@
 #endif
 #define CINTERFACE
 #define COBJMACROS
-#define _WIN32_IE 0x500  // Workaround bug in shlwapi.h when compiling C++
+#define _WIN32_IE \
  0x500            // Workaround bug in shlwapi.h when compiling C++
                   // code with COBJMACROS.
 #include <ole2.h>  // CreateStreamOnHGlobal()
 #include <shlwapi.h>
 #include <tchar.h>
 #include <windows.h>
 #include <wincodec.h>
 #include <windows.h>
 #include "../examples/unicode.h"
 #include "./imageio_util.h"
@@ -48,8 +49,7 @@
 // modified version of DEFINE_GUID from guiddef.h.
 #define WEBP_DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
-  static const GUID name = \
+  static const GUID name = {l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
      { l, w1, w2, { b1, b2,  b3,  b4,  b5,  b6,  b7,  b8 } }
 #ifdef __cplusplus
 #define MAKE_REFGUID(x) (x)
@@ -66,23 +66,17 @@ typedef struct WICFormatImporter {
 // From Microsoft SDK 7.0a -- wincodec.h
 // Create local copies for compatibility when building against earlier
 // versions of the SDK.
-WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppBGR_,
+WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppBGR_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1,
-                 0x6fddc324, 0x4e03, 0x4bfe,
+                 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0c);
-                 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0c);
+WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppRGB_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1,
-WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppRGB_,
+                 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0d);
-                 0x6fddc324, 0x4e03, 0x4bfe,
+WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppBGRA_, 0x6fddc324, 0x4e03, 0x4bfe,
                 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0d);
 WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppBGRA_,
                 0x6fddc324, 0x4e03, 0x4bfe,
                 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0f);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppRGBA_,
+WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppRGBA_, 0xf5c7ad2d, 0x6a8d, 0x43dd,
                 0xf5c7ad2d, 0x6a8d, 0x43dd,
                 0xa7, 0xa8, 0xa2, 0x99, 0x35, 0x26, 0x1a, 0xe9);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppBGRA_,
+WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppBGRA_, 0x1562ff7c, 0xd352, 0x46f9,
                 0x1562ff7c, 0xd352, 0x46f9,
                 0x97, 0x9e, 0x42, 0x97, 0x6b, 0x79, 0x22, 0x46);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppRGBA_,
+WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppRGBA_, 0x6fddc324, 0x4e03, 0x4bfe,
                 0x6fddc324, 0x4e03, 0x4bfe,
                 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x16);
 static HRESULT OpenInputStream(const char* filename, IStream** stream) {
@@ -147,8 +141,7 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
  if (SUCCEEDED(hr)) {
    UINT num_color_contexts;
-    IFS(IWICBitmapFrameDecode_GetColorContexts(frame,
+    IFS(IWICBitmapFrameDecode_GetColorContexts(frame, count, color_contexts,
                                               count, color_contexts,
                                               &num_color_contexts));
    assert(FAILED(hr) || num_color_contexts <= count);
    for (i = 0; SUCCEEDED(hr) && i < num_color_contexts; ++i) {
@@ -156,8 +149,8 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
      IFS(IWICColorContext_GetType(color_contexts[i], &type));
      if (SUCCEEDED(hr) && type == WICColorContextProfile) {
        UINT size;
-        IFS(IWICColorContext_GetProfileBytes(color_contexts[i],
+        IFS(IWICColorContext_GetProfileBytes(color_contexts[i], 0, NULL,
-                                             0, NULL, &size));
+                                             &size));
        if (SUCCEEDED(hr) && size > 0) {
          iccp->bytes = (uint8_t*)malloc(size);
          if (iccp->bytes == NULL) {
@@ -165,9 +158,8 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
            break;
          }
          iccp->size = size;
-          IFS(IWICColorContext_GetProfileBytes(color_contexts[i],
+          IFS(IWICColorContext_GetProfileBytes(
-                                               (UINT)iccp->size, iccp->bytes,
+              color_contexts[i], (UINT)iccp->size, iccp->bytes, &size));
                                               &size));
          if (SUCCEEDED(hr) && size != iccp->size) {
            fprintf(stderr, "Warning! ICC profile size (%u) != expected (%u)\n",
                    size, (uint32_t)iccp->size);
@@ -209,8 +201,7 @@ static int HasPalette(GUID pixel_format) {
 static int HasAlpha(IWICImagingFactory* const factory,
                    IWICBitmapDecoder* const decoder,
-                    IWICBitmapFrameDecode* const frame,
+                    IWICBitmapFrameDecode* const frame, GUID pixel_format) {
                    GUID pixel_format) {
  int has_alpha;
  if (HasPalette(pixel_format)) {
    IWICPalette* frame_palette = NULL;
@@ -245,21 +236,20 @@ static int HasAlpha(IWICImagingFactory* const factory,
  return has_alpha;
 }
-int ReadPictureWithWIC(const char* const filename,
+int ReadPictureWithWIC(const char* const filename, WebPPicture* const pic,
-                       WebPPicture* const pic, int keep_alpha,
+                       int keep_alpha, Metadata* const metadata) {
                       Metadata* const metadata) {
  // From Microsoft SDK 6.0a -- ks.h
  // Define a local copy to avoid link errors under mingw.
  WEBP_DEFINE_GUID(GUID_NULL_, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
  static const WICFormatImporter kAlphaFormatImporters[] = {
-    { &GUID_WICPixelFormat32bppBGRA_, 4, WebPPictureImportBGRA },
+      {&GUID_WICPixelFormat32bppBGRA_, 4, WebPPictureImportBGRA},
-    { &GUID_WICPixelFormat32bppRGBA_, 4, WebPPictureImportRGBA },
+      {&GUID_WICPixelFormat32bppRGBA_, 4, WebPPictureImportRGBA},
-    { NULL, 0, NULL },
+      {NULL, 0, NULL},
  };
  static const WICFormatImporter kNonAlphaFormatImporters[] = {
-    { &GUID_WICPixelFormat24bppBGR_, 3, WebPPictureImportBGR },
+      {&GUID_WICPixelFormat24bppBGR_, 3, WebPPictureImportBGR},
-    { &GUID_WICPixelFormat24bppRGB_, 3, WebPPictureImportRGB },
+      {&GUID_WICPixelFormat24bppRGB_, 3, WebPPictureImportRGB},
-    { NULL, 0, NULL },
+      {NULL, 0, NULL},
  };
  HRESULT hr = S_OK;
  IWICBitmapFrameDecode* frame = NULL;
@@ -274,26 +264,20 @@ int ReadPictureWithWIC(const char* const filename,
  const WICFormatImporter* importer = NULL;
  GUID src_container_format = GUID_NULL_;
  // From Windows Kits\10\Include\10.0.19041.0\um\wincodec.h
-  WEBP_DEFINE_GUID(GUID_ContainerFormatWebp_,
+  WEBP_DEFINE_GUID(GUID_ContainerFormatWebp_, 0xe094b0e2, 0x67f2, 0x45b3, 0xb0,
-                   0xe094b0e2, 0x67f2, 0x45b3,
+                   0xea, 0x11, 0x53, 0x37, 0xca, 0x7c, 0xf3);
                   0xb0, 0xea, 0x11, 0x53, 0x37, 0xca, 0x7c, 0xf3);
  static const GUID* kAlphaContainers[] = {
-    &GUID_ContainerFormatBmp,
+      &GUID_ContainerFormatBmp, &GUID_ContainerFormatPng,
-    &GUID_ContainerFormatPng,
+      &GUID_ContainerFormatTiff, &GUID_ContainerFormatWebp_, NULL};
    &GUID_ContainerFormatTiff,
    &GUID_ContainerFormatWebp_,
    NULL
  };
  int has_alpha = 0;
  int64_t stride;
  if (filename == NULL || pic == NULL) return 0;
  IFS(CoInitialize(NULL));
-  IFS(CoCreateInstance(MAKE_REFGUID(CLSID_WICImagingFactory), NULL,
+  IFS(CoCreateInstance(
-                       CLSCTX_INPROC_SERVER,
+      MAKE_REFGUID(CLSID_WICImagingFactory), NULL, CLSCTX_INPROC_SERVER,
-                       MAKE_REFGUID(IID_IWICImagingFactory),
+      MAKE_REFGUID(IID_IWICImagingFactory), (LPVOID*)&factory));
                       (LPVOID*)&factory));
  if (hr == REGDB_E_CLASSNOTREG) {
    fprintf(stderr,
            "Couldn't access Windows Imaging Component (are you running "
@@ -303,8 +287,7 @@ int ReadPictureWithWIC(const char* const filename,
  // Prepare for image decoding.
  IFS(OpenInputStream(filename, &stream));
  IFS(IWICImagingFactory_CreateDecoderFromStream(
-          factory, stream, NULL,
+      factory, stream, NULL, WICDecodeMetadataCacheOnDemand, &decoder));
          WICDecodeMetadataCacheOnDemand, &decoder));
  IFS(IWICBitmapDecoder_GetFrameCount(decoder, &frame_count));
  if (SUCCEEDED(hr)) {
    if (frame_count == 0) {
@@ -338,18 +321,15 @@ int ReadPictureWithWIC(const char* const filename,
       hr == S_OK && importer->import != NULL; ++importer) {
    BOOL can_convert;
    const HRESULT cchr = IWICFormatConverter_CanConvert(
-        converter,
+        converter, MAKE_REFGUID(src_pixel_format),
-        MAKE_REFGUID(src_pixel_format),
+        MAKE_REFGUID(*importer->pixel_format), &can_convert);
        MAKE_REFGUID(*importer->pixel_format),
        &can_convert);
    if (SUCCEEDED(cchr) && can_convert) break;
  }
  if (importer->import == NULL) hr = E_FAIL;
-  IFS(IWICFormatConverter_Initialize(converter, (IWICBitmapSource*)frame,
+  IFS(IWICFormatConverter_Initialize(
-                                     importer->pixel_format,
+      converter, (IWICBitmapSource*)frame, importer->pixel_format,
-                                     WICBitmapDitherTypeNone,
+      WICBitmapDitherTypeNone, NULL, 0.0, WICBitmapPaletteTypeCustom));
                                     NULL, 0.0, WICBitmapPaletteTypeCustom));
  // Decode.
  IFS(IWICFormatConverter_GetSize(converter, &width, &height));
@@ -361,11 +341,10 @@ int ReadPictureWithWIC(const char* const filename,
  if (SUCCEEDED(hr)) {
    rgb = (BYTE*)malloc((size_t)stride * height);
-    if (rgb == NULL)
+    if (rgb == NULL) hr = E_OUTOFMEMORY;
      hr = E_OUTOFMEMORY;
  }
-  IFS(IWICFormatConverter_CopyPixels(converter, NULL,
+  IFS(IWICFormatConverter_CopyPixels(converter, NULL, (UINT)stride,
-                                     (UINT)stride, (UINT)stride * height, rgb));
+                                     (UINT)stride * height, rgb));
  // WebP conversion.
  if (SUCCEEDED(hr)) {
@@ -402,7 +381,8 @@ int ReadPictureWithWIC(const char* const filename,
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
-  fprintf(stderr, "Windows Imaging Component (WIC) support not compiled. "
+  fprintf(stderr,
          "Windows Imaging Component (WIC) support not compiled. "
          "Visual Studio and mingw-w64 builds support WIC. Make sure "
          "wincodec.h detection is working correctly if using autoconf "
          "and HAVE_WINCODEC_H is defined before building.\n");
--- a/sharpyuv/sharpyuv.c
+++ b/sharpyuv/sharpyuv.c
@@ -26,9 +26,7 @@
 //------------------------------------------------------------------------------
-int SharpYuvGetVersion(void) {
+int SharpYuvGetVersion(void) { return SHARPYUV_VERSION; }
  return SHARPYUV_VERSION;
 }
 //------------------------------------------------------------------------------
 // Sharp RGB->YUV conversion
@@ -148,12 +146,9 @@ static WEBP_INLINE int Shift(int v, int shift) {
  return (shift >= 0) ? (v << shift) : (v >> -shift);
 }
-static void ImportOneRow(const uint8_t* const r_ptr,
+static void ImportOneRow(const uint8_t* const r_ptr, const uint8_t* const g_ptr,
-                         const uint8_t* const g_ptr,
+                         const uint8_t* const b_ptr, int rgb_step,
-                         const uint8_t* const b_ptr,
+                         int rgb_bit_depth, int pic_width,
                         int rgb_step,
                         int rgb_bit_depth,
                         int pic_width,
                         fixed_y_t* const dst) {
  // Convert the rgb_step from a number of bytes to a number of uint8_t or
  // uint16_t values depending the bit depth.
@@ -181,13 +176,9 @@ static void ImportOneRow(const uint8_t* const r_ptr,
 }
 static void InterpolateTwoRows(const fixed_y_t* const best_y,
-                               const fixed_t* prev_uv,
+                               const fixed_t* prev_uv, const fixed_t* cur_uv,
-                               const fixed_t* cur_uv,
+                               const fixed_t* next_uv, int w, fixed_y_t* out1,
-                               const fixed_t* next_uv,
+                               fixed_y_t* out2, int rgb_bit_depth) {
                               int w,
                               fixed_y_t* out1,
                               fixed_y_t* out2,
                               int rgb_bit_depth) {
  const int uv_w = w >> 1;
  const int len = (w - 1) >> 1;  // length to filter
  int k = 3;
@@ -220,16 +211,16 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y,
 static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
                                         const int coeffs[4], int sfix) {
  const int srounder = 1 << (YUV_FIX + sfix - 1);
-  const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
+  const int luma =
-                   coeffs[3] + srounder;
+      coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + coeffs[3] + srounder;
  return (luma >> (YUV_FIX + sfix));
 }
 static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
                            uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
                            int u_stride, uint8_t* v_ptr, int v_stride,
-                            int rgb_bit_depth,
+                            int rgb_bit_depth, int yuv_bit_depth, int width,
-                            int yuv_bit_depth, int width, int height,
+                            int height,
                            const SharpYuvConversionMatrix* yuv_matrix) {
  int i, j;
  const fixed_t* const best_uv_base = best_uv;
@@ -335,9 +326,8 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
  assert(w > 0);
  assert(h > 0);
-  if (best_y_base == NULL || best_uv_base == NULL ||
+  if (best_y_base == NULL || best_uv_base == NULL || target_y_base == NULL ||
-      target_y_base == NULL || target_uv_base == NULL ||
+      target_uv_base == NULL || best_rgb_y == NULL || best_rgb_uv == NULL ||
      best_rgb_y == NULL || best_rgb_uv == NULL ||
      tmp_buffer == NULL) {
    ok = 0;
    goto End;
@@ -350,8 +340,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
    fixed_y_t* const src2 = tmp_buffer + 3 * w;
    // prepare two rows of input
-    ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
+    ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width, src1);
                 src1);
    if (!is_last_row) {
      ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
                   rgb_step, rgb_bit_depth, width, src2);
@@ -390,8 +379,8 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
      fixed_y_t* const src2 = tmp_buffer + 3 * w;
      {
        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
-        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
+        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2,
-                           src1, src2, rgb_bit_depth);
+                           rgb_bit_depth);
        prev_uv = cur_uv;
        cur_uv = next_uv;
      }
@@ -424,7 +413,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
                        u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
                        width, height, yuv_matrix);
- End:
+End:
  free(best_y_base);
  free(best_uv_base);
  free(target_y_base);
@@ -449,7 +438,9 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
    return;                                     \
  } while (0)
 #else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
-#define LOCK_ACCESS do {} while (0)
+#define LOCK_ACCESS \
  do {              \
  } while (0)
 #define UNLOCK_ACCESS_AND_RETURN return
 #endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
--- a/sharpyuv/sharpyuv_gamma.c
+++ b/sharpyuv/sharpyuv_gamma.c
@@ -67,8 +67,7 @@ void SharpYuvInitGammaTables(void) {
        } else {
          value = (1. + a) * pow(g, 1. / kGammaF) - a;
        }
-        kLinearToGammaTabS[v] =
+        kLinearToGammaTabS[v] = (uint32_t)(final_scale * value + 0.5);
            (uint32_t)(final_scale * value + 0.5);
      }
      // to prevent small rounding errors to cause read-overflow:
      kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
--- a/sharpyuv/sharpyuv_neon.c
+++ b/sharpyuv/sharpyuv_neon.c
@@ -14,9 +14,9 @@
 #include "sharpyuv/sharpyuv_dsp.h"
 #if defined(WEBP_USE_NEON)
 #include <arm_neon.h>
 #include <assert.h>
 #include <stdlib.h>
 #include <arm_neon.h>
 static uint16_t clip_NEON(int v, int max) {
  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
--- a/sharpyuv/sharpyuv_sse2.c
+++ b/sharpyuv/sharpyuv_sse2.c
@@ -15,7 +15,6 @@
 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>
 #include <stdlib.h>
 #include "src/dsp/cpu.h"
--- a/src/dec/alpha_dec.c
+++ b/src/dec/alpha_dec.c
@@ -79,8 +79,7 @@ WEBP_NODISCARD static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
  if (dec->method < ALPHA_NO_COMPRESSION ||
      dec->method > ALPHA_LOSSLESS_COMPRESSION ||
      dec->filter >= WEBP_FILTER_LAST ||
-      dec->pre_processing > ALPHA_PREPROCESSED_LEVELS ||
+      dec->pre_processing > ALPHA_PREPROCESSED_LEVELS || rsrv != 0) {
      rsrv != 0) {
    return 0;
  }
@@ -197,12 +196,12 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
        return NULL;
      }
      if (!AllocateAlphaPlane(dec, io)) goto Error;
-      if (!ALPHInit(dec->alph_dec, dec->alpha_data, dec->alpha_data_size,
+      if (!ALPHInit(dec->alph_dec, dec->alpha_data, dec->alpha_data_size, io,
-                    io, dec->alpha_plane)) {
+                    dec->alpha_plane)) {
        VP8LDecoder* const vp8l_dec = dec->alph_dec->vp8l_dec;
-        VP8SetError(dec,
+        VP8SetError(
-                    (vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY
+            dec,
-                                       : vp8l_dec->status,
+            (vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY : vp8l_dec->status,
            "Alpha decoder initialization failed.");
        goto Error;
      }
@@ -222,12 +221,11 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
      ALPHDelete(dec->alph_dec);
      dec->alph_dec = NULL;
      if (dec->alpha_dithering > 0) {
-        uint8_t* const alpha = dec->alpha_plane + io->crop_top * width
+        uint8_t* const alpha =
-                             + io->crop_left;
+            dec->alpha_plane + io->crop_top * width + io->crop_left;
-        if (!WebPDequantizeLevels(alpha,
+        if (!WebPDequantizeLevels(alpha, io->crop_right - io->crop_left,
-                                  io->crop_right - io->crop_left,
+                                  io->crop_bottom - io->crop_top, width,
-                                  io->crop_bottom - io->crop_top,
+                                  dec->alpha_dithering)) {
                                  width, dec->alpha_dithering)) {
          goto Error;
        }
      }
@@ -237,7 +235,7 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
  // Return a pointer to the current decoded row.
  return dec->alpha_plane + row * width;
- Error:
+Error:
  WebPDeallocateAlphaMemory(dec);
  return NULL;
 }
--- a/src/dec/alphai_dec.h
+++ b/src/dec/alphai_dec.h
@@ -15,10 +15,10 @@
 #define WEBP_DEC_ALPHAI_DEC_H_
 #include "src/dec/vp8_dec.h"
 #include "src/webp/types.h"
 #include "src/dec/webpi_dec.h"
 #include "src/dsp/dsp.h"
 #include "src/utils/filters_utils.h"
 #include "src/webp/types.h"
 #ifdef __cplusplus
 extern "C" {
--- a/src/dec/buffer_dec.c
+++ b/src/dec/buffer_dec.c
@@ -26,10 +26,9 @@
 // WebPDecBuffer
 // Number of bytes per pixel for the different color-spaces.
-static const uint8_t kModeBpp[MODE_LAST] = {
+static const uint8_t kModeBpp[MODE_LAST] = {3, 4, 3, 4, 4, 2, 2,  //
  3, 4, 3, 4, 4, 2, 2,
                                            4, 4, 4, 2,  // pre-multiplied modes
-  1, 1 };
+                                            1, 1};
 // Convert to an integer to handle both the unsigned/signed enum cases
 // without the need for casting to remove type limit warnings.
@@ -202,8 +201,8 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
 #if !defined(WEBP_REDUCE_SIZE)
      int scaled_width = options->scaled_width;
      int scaled_height = options->scaled_height;
-      if (!WebPRescalerGetScaledDimensions(
+      if (!WebPRescalerGetScaledDimensions(width, height, &scaled_width,
-              width, height, &scaled_width, &scaled_height)) {
+                                           &scaled_height)) {
        return VP8_STATUS_INVALID_PARAM;
      }
      width = scaled_width;
@@ -289,8 +288,8 @@ VP8StatusCode WebPCopyDecBufferPixels(const WebPDecBuffer* const src_buf,
  } else {
    const WebPYUVABuffer* const src = &src_buf->u.YUVA;
    const WebPYUVABuffer* const dst = &dst_buf->u.YUVA;
-    WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride,
+    WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride, src_buf->width,
-                  src_buf->width, src_buf->height);
+                  src_buf->height);
    WebPCopyPlane(src->u, src->u_stride, dst->u, dst->u_stride,
                  (src_buf->width + 1) / 2, (src_buf->height + 1) / 2);
    WebPCopyPlane(src->v, src->v_stride, dst->v, dst->v_stride,
--- a/src/dec/common_dec.h
+++ b/src/dec/common_dec.h
@@ -15,7 +15,8 @@
 #define WEBP_DEC_COMMON_DEC_H_
 // intra prediction modes
-enum { B_DC_PRED = 0,   // 4x4 modes
+enum {
  B_DC_PRED = 0,  // 4x4 modes
  B_TM_PRED = 1,
  B_VE_PRED = 2,
  B_HE_PRED = 3,
@@ -28,8 +29,10 @@ enum { B_DC_PRED = 0,   // 4x4 modes
  NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
  // Luma16 or UV modes
-       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
+  DC_PRED = B_DC_PRED,
-       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
+  V_PRED = B_VE_PRED,
  H_PRED = B_HE_PRED,
  TM_PRED = B_TM_PRED,
  B_PRED = NUM_BMODES,  // refined I4x4 mode
  NUM_PRED_MODES = 4,
@@ -37,9 +40,11 @@ enum { B_DC_PRED = 0,   // 4x4 modes
  B_DC_PRED_NOTOP = 4,
  B_DC_PRED_NOLEFT = 5,
  B_DC_PRED_NOTOPLEFT = 6,
-       NUM_B_DC_MODES = 7 };
+  NUM_B_DC_MODES = 7
 };
-enum { MB_FEATURE_TREE_PROBS = 3,
+enum {
  MB_FEATURE_TREE_PROBS = 3,
  NUM_MB_SEGMENTS = 4,
  NUM_REF_LF_DELTAS = 4,
  NUM_MODE_LF_DELTAS = 4,  // I4x4, ZERO, *, SPLIT
@@ -49,7 +54,7 @@ enum { MB_FEATURE_TREE_PROBS = 3,
  NUM_BANDS = 8,
  NUM_CTX = 3,
  NUM_PROBAS = 11
-     };
+};
 // Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
 int IsValidColorspace(int webp_csp_mode);
--- a/src/dec/frame_dec.c
+++ b/src/dec/frame_dec.c
@@ -33,8 +33,7 @@ static const uint16_t kScan[16] = {
    0 + 0 * BPS,  4 + 0 * BPS,  8 + 0 * BPS,  12 + 0 * BPS,
    0 + 4 * BPS,  4 + 4 * BPS,  8 + 4 * BPS,  12 + 4 * BPS,
    0 + 8 * BPS,  4 + 8 * BPS,  8 + 8 * BPS,  12 + 8 * BPS,
-  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
+    0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS};
 };
 static int CheckMode(int mb_x, int mb_y, int mode) {
  if (mode == B_DC_PRED) {
@@ -209,7 +208,7 @@ static void ReconstructRow(const VP8Decoder* const dec,
 // Simple filter:  up to 2 luma samples are read and 1 is written.
 // Complex filter: up to 4 luma samples are read and 3 are written. Same for
 //                 U/V, so it's 8 samples total (because of the 2x upsampling).
-static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
+static const uint8_t kFilterExtraRows[3] = {0, 2, 8};
 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
  const VP8ThreadContext* const ctx = &dec->thread_ctx;
@@ -333,8 +332,7 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
 #define DITHER_AMP_TAB_SIZE 12
 static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
    // roughly, it's dqm->uv_mat[1]
-  8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
+    8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1};
 };
 void VP8InitDithering(const WebPDecoderOptions* const options,
                      VP8Decoder* const dec) {
@@ -512,9 +510,9 @@ static int FinishRow(void* arg1, void* arg2) {
 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
  int ok = 1;
  VP8ThreadContext* const ctx = &dec->thread_ctx;
-  const int filter_row =
+  const int filter_row = (dec->filter_type > 0) &&
-      (dec->filter_type > 0) &&
+                         (dec->mb_y >= dec->tl_mb_y) &&
-      (dec->mb_y >= dec->tl_mb_y) && (dec->mb_y <= dec->br_mb_y);
+                         (dec->mb_y <= dec->br_mb_y);
  if (dec->mt_method == 0) {
    // ctx->id and ctx->f_info are already set
    ctx->mb_y = dec->mb_y;
@@ -669,8 +667,8 @@ static int InitThreadContext(VP8Decoder* const dec) {
 }
 int VP8GetThreadMethod(const WebPDecoderOptions* const options,
-                       const WebPHeaderStructure* const headers,
+                       const WebPHeaderStructure* const headers, int width,
-                       int width, int height) {
+                       int height) {
  if (options == NULL || options->use_threads == 0) {
    return 0;
  }
@@ -698,22 +696,23 @@ static int AllocateMemory(VP8Decoder* const dec) {
  const size_t top_size = sizeof(VP8TopSamples) * mb_w;
  const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
  const size_t f_info_size =
-      (dec->filter_type > 0) ?
+      (dec->filter_type > 0)
-          mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)
+          ? mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)
          : 0;
  const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b);
  const size_t mb_data_size =
      (dec->mt_method == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data);
-  const size_t cache_height = (16 * num_caches
+  const size_t cache_height =
-                            + kFilterExtraRows[dec->filter_type]) * 3 / 2;
+      (16 * num_caches + kFilterExtraRows[dec->filter_type]) * 3 / 2;
  const size_t cache_size = top_size * cache_height;
  // alpha_size is the only one that scales as width x height.
-  const uint64_t alpha_size = (dec->alpha_data != NULL) ?
+  const uint64_t alpha_size =
-      (uint64_t)dec->pic_hdr.width * dec->pic_hdr.height : 0ULL;
+      (dec->alpha_data != NULL)
-  const uint64_t needed = (uint64_t)intra_pred_mode_size
+          ? (uint64_t)dec->pic_hdr.width * dec->pic_hdr.height
-                        + top_size + mb_info_size + f_info_size
+          : 0ULL;
-                        + yuv_size + mb_data_size
+  const uint64_t needed = (uint64_t)intra_pred_mode_size + top_size +
-                        + cache_size + alpha_size + WEBP_ALIGN_CST;
+                          mb_info_size + f_info_size + yuv_size + mb_data_size +
                          cache_size + alpha_size + WEBP_ALIGN_CST;
  uint8_t* mem;
  if (!CheckSizeOverflow(needed)) return 0;  // check for overflow
@@ -769,10 +768,10 @@ static int AllocateMemory(VP8Decoder* const dec) {
    const int extra_y = extra_rows * dec->cache_y_stride;
    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride;
    dec->cache_y = mem + extra_y;
-    dec->cache_u = dec->cache_y
+    dec->cache_u =
-                  + 16 * num_caches * dec->cache_y_stride + extra_uv;
+        dec->cache_y + 16 * num_caches * dec->cache_y_stride + extra_uv;
-    dec->cache_v = dec->cache_u
+    dec->cache_v =
-                  + 8 * num_caches * dec->cache_uv_stride + extra_uv;
+        dec->cache_u + 8 * num_caches * dec->cache_uv_stride + extra_uv;
    dec->cache_id = 0;
  }
  mem += cache_size;
--- a/src/dec/idec_dec.c
+++ b/src/dec/idec_dec.c
@@ -51,11 +51,7 @@ typedef enum {
 } DecState;
 // Operating state for the MemBuffer
-typedef enum {
+typedef enum { MEM_MODE_NONE = 0, MEM_MODE_APPEND, MEM_MODE_MAP } MemBufferMode;
  MEM_MODE_NONE = 0,
  MEM_MODE_APPEND,
  MEM_MODE_MAP
 } MemBufferMode;
 // storage for partition #0 and partial data (in a rolling fashion)
 typedef struct {
@@ -437,14 +433,14 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
  }
  // Allocate/Verify output buffer now
-  dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
+  dec->status =
-                                      output);
+      WebPAllocateDecBuffer(io->width, io->height, params->options, output);
  if (dec->status != VP8_STATUS_OK) {
    return IDecError(idec, dec->status);
  }
  // This change must be done before calling VP8InitFrame()
-  dec->mt_method = VP8GetThreadMethod(params->options, NULL,
+  dec->mt_method =
-                                      io->width, io->height);
+      VP8GetThreadMethod(params->options, NULL, io->width, io->height);
  VP8InitDithering(params->options, dec);
  dec->status = CopyParts0Data(idec);
@@ -558,8 +554,8 @@ static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
    return ErrorStatusLossless(idec, dec->status);
  }
  // Allocate/verify output buffer now.
-  dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
+  dec->status =
-                                      output);
+      WebPAllocateDecBuffer(io->width, io->height, params->options, output);
  if (dec->status != VP8_STATUS_OK) {
    return IDecError(idec, dec->status);
  }
@@ -584,7 +580,7 @@ static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
                                               : FinishDecoding(idec);
 }
-  // Main decoding loop
+// Main decoding loop
 static VP8StatusCode IDecode(WebPIDecoder* idec) {
  VP8StatusCode status = VP8_STATUS_SUSPENDED;
@@ -737,9 +733,9 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
 }
 WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
-                           uint8_t* u, size_t u_size, int u_stride,
+                           uint8_t* u, size_t u_size, int u_stride, uint8_t* v,
-                           uint8_t* v, size_t v_size, int v_stride,
+                           size_t v_size, int v_stride, uint8_t* a,
-                           uint8_t* a, size_t a_size, int a_stride) {
+                           size_t a_size, int a_stride) {
  const int is_external_memory = (luma != NULL) ? 1 : 0;
  WebPIDecoder* idec;
  WEBP_CSP_MODE colorspace;
@@ -780,12 +776,10 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
 }
 WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
-                          uint8_t* u, size_t u_size, int u_stride,
+                          uint8_t* u, size_t u_size, int u_stride, uint8_t* v,
-                          uint8_t* v, size_t v_size, int v_stride) {
+                          size_t v_size, int v_stride) {
-  return WebPINewYUVA(luma, luma_size, luma_stride,
+  return WebPINewYUVA(luma, luma_size, luma_stride, u, u_size, u_stride, v,
-                      u, u_size, u_stride,
+                      v_size, v_stride, NULL, 0, 0);
                      v, v_size, v_stride,
                      NULL, 0, 0);
 }
 //------------------------------------------------------------------------------
@@ -801,8 +795,8 @@ static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
  return VP8_STATUS_SUSPENDED;
 }
-VP8StatusCode WebPIAppend(WebPIDecoder* idec,
+VP8StatusCode WebPIAppend(WebPIDecoder* idec, const uint8_t* data,
-                          const uint8_t* data, size_t data_size) {
+                          size_t data_size) {
  VP8StatusCode status;
  if (idec == NULL || data == NULL) {
    return VP8_STATUS_INVALID_PARAM;
@@ -822,8 +816,8 @@ VP8StatusCode WebPIAppend(WebPIDecoder* idec,
  return IDecode(idec);
 }
-VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
+VP8StatusCode WebPIUpdate(WebPIDecoder* idec, const uint8_t* data,
-                          const uint8_t* data, size_t data_size) {
+                          size_t data_size) {
  VP8StatusCode status;
  if (idec == NULL || data == NULL) {
    return VP8_STATUS_INVALID_PARAM;
@@ -858,9 +852,8 @@ static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
  return idec->params.output;
 }
-const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
+const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec, int* left,
-                                      int* left, int* top,
+                                      int* top, int* width, int* height) {
                                      int* width, int* height) {
  const WebPDecBuffer* const src = GetOutputBuffer(idec);
  if (left != NULL) *left = 0;
  if (top != NULL) *top = 0;
@@ -913,10 +906,8 @@ WEBP_NODISCARD uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
  return src->u.YUVA.y;
 }
-int WebPISetIOHooks(WebPIDecoder* const idec,
+int WebPISetIOHooks(WebPIDecoder* const idec, VP8IoPutHook put,
-                    VP8IoPutHook put,
+                    VP8IoSetupHook setup, VP8IoTeardownHook teardown,
                    VP8IoSetupHook setup,
                    VP8IoTeardownHook teardown,
                    void* user_data) {
  if (idec == NULL || idec->state > STATE_WEBP_HEADER) {
    return 0;
--- a/src/dec/io_dec.c
+++ b/src/dec/io_dec.c
@@ -17,7 +17,6 @@
 #include <string.h>
 #include "src/dec/vp8_dec.h"
 #include "src/webp/types.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dec/webpi_dec.h"
 #include "src/dsp/cpu.h"
@@ -26,6 +25,7 @@
 #include "src/utils/rescaler_utils.h"
 #include "src/utils/utils.h"
 #include "src/webp/decode.h"
 #include "src/webp/types.h"
 //------------------------------------------------------------------------------
 // Main YUV<->RGB conversion functions
@@ -51,9 +51,8 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
  WebPDecBuffer* const output = p->output;
  WebPRGBABuffer* const buf = &output->u.RGBA;
  uint8_t* const dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
-  WebPSamplerProcessPlane(io->y, io->y_stride,
+  WebPSamplerProcessPlane(io->y, io->y_stride, io->u, io->v, io->uv_stride, dst,
-                          io->u, io->v, io->uv_stride,
+                          buf->stride, io->mb_w, io->mb_h,
                          dst, buf->stride, io->mb_w, io->mb_h,
                          WebPSamplers[output->colorspace]);
  return io->mb_h;
 }
@@ -82,8 +81,8 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
    upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w);
  } else {
    // We can finish the left-over line from previous call.
-    upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
+    upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v, dst - buf->stride,
-             dst - buf->stride, dst, mb_w);
+             dst, mb_w);
    ++num_lines_out;
  }
  // Loop over each output pairs of row.
@@ -94,8 +93,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
    cur_v += io->uv_stride;
    dst += 2 * buf->stride;
    cur_y += 2 * io->y_stride;
-    upsample(cur_y - io->y_stride, cur_y,
+    upsample(cur_y - io->y_stride, cur_y, top_u, top_v, cur_u, cur_v,
             top_u, top_v, cur_u, cur_v,
             dst - buf->stride, dst, mb_w);
  }
  // move to last row
@@ -111,8 +109,8 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
  } else {
    // Process the very last row of even-sized picture
    if (!(y_end & 1)) {
-      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v,
+      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst + buf->stride, NULL,
-               dst + buf->stride, NULL, mb_w);
+               mb_w);
    }
  }
  return num_lines_out;
@@ -153,8 +151,8 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
  return 0;
 }
-static int GetAlphaSourceRow(const VP8Io* const io,
+static int GetAlphaSourceRow(const VP8Io* const io, const uint8_t** alpha,
-                             const uint8_t** alpha, int* const num_rows) {
+                             int* const num_rows) {
  int start_y = io->mb_y;
  *num_rows = io->mb_h;
@@ -192,14 +190,14 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
    uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)start_y * buf->stride;
    uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
-    const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w,
+    const int has_alpha =
-                                            num_rows, dst, buf->stride);
+        WebPDispatchAlpha(alpha, io->width, mb_w, num_rows, dst, buf->stride);
    (void)expected_num_lines_out;
    assert(expected_num_lines_out == num_rows);
    // has_alpha is true if there's non-trivial alpha to premultiply with.
    if (has_alpha && WebPIsPremultipliedMode(colorspace)) {
-      WebPApplyAlphaMultiply(base_rgba, alpha_first,
+      WebPApplyAlphaMultiply(base_rgba, alpha_first, mb_w, num_rows,
-                             mb_w, num_rows, buf->stride);
+                             buf->stride);
    }
  }
  return 0;
@@ -245,8 +243,8 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
 // YUV rescaling (no final RGB conversion needed)
 #if !defined(WEBP_REDUCE_SIZE)
-static int Rescale(const uint8_t* src, int src_stride,
+static int Rescale(const uint8_t* src, int src_stride, int new_lines,
-                   int new_lines, WebPRescaler* const wrk) {
+                   WebPRescaler* const wrk) {
  int num_lines_out = 0;
  while (new_lines > 0) {  // import new contributions of source rows.
    const int lines_in = WebPRescalerImport(wrk, new_lines, src, src_stride);
@@ -267,8 +265,8 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
    // internal buffer. This is OK since these samples are not used for
    // intra-prediction (the top samples are saved in cache_y/u/v).
    // But we need to cast the const away, though.
-    WebPMultRows((uint8_t*)io->y, io->y_stride,
+    WebPMultRows((uint8_t*)io->y, io->y_stride, io->a, io->width, io->mb_w,
-                 io->a, io->width, io->mb_w, mb_h, 0);
+                 mb_h, 0);
  }
  num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler);
  Rescale(io->u, io->uv_stride, uv_mb_h, p->scaler_u);
@@ -331,29 +329,28 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
  }
  work = (rescaler_t*)p->memory;
-  scalers = (WebPRescaler*)WEBP_ALIGN(
+  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size -
-      (const uint8_t*)work + total_size - rescaler_size);
+                                      rescaler_size);
  p->scaler_y = &scalers[0];
  p->scaler_u = &scalers[1];
  p->scaler_v = &scalers[2];
  p->scaler_a = has_alpha ? &scalers[3] : NULL;
-  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
+  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, buf->y, out_width,
-                        buf->y, out_width, out_height, buf->y_stride, 1,
+                        out_height, buf->y_stride, 1, work) ||
-                        work) ||
+      !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, buf->u,
-      !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
+                        uv_out_width, uv_out_height, buf->u_stride, 1,
                        buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
                        work + work_size) ||
-      !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
+      !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, buf->v,
-                        buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
+                        uv_out_width, uv_out_height, buf->v_stride, 1,
                        work + work_size + uv_work_size)) {
    return 0;
  }
  p->emit = EmitRescaledYUV;
  if (has_alpha) {
-    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
+    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, buf->a, out_width,
-                          buf->a, out_width, out_height, buf->a_stride, 1,
+                          out_height, buf->a_stride, 1,
                          work + work_size + 2 * uv_work_size)) {
      return 0;
    }
@@ -381,8 +378,8 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
    WebPRescalerExportRow(p->scaler_y);
    WebPRescalerExportRow(p->scaler_u);
    WebPRescalerExportRow(p->scaler_v);
-    convert(p->scaler_y->dst, p->scaler_u->dst, p->scaler_v->dst,
+    convert(p->scaler_y->dst, p->scaler_u->dst, p->scaler_v->dst, dst,
-            dst, p->scaler_y->dst_width);
+            p->scaler_y->dst_width);
    dst += buf->stride;
    ++num_lines_out;
  }
@@ -419,8 +416,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
  uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
  const WEBP_CSP_MODE colorspace = p->output->colorspace;
-  const int alpha_first =
+  const int alpha_first = (colorspace == MODE_ARGB || colorspace == MODE_Argb);
      (colorspace == MODE_ARGB || colorspace == MODE_Argb);
  uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
  int num_lines_out = 0;
  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
@@ -436,8 +432,8 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
    ++num_lines_out;
  }
  if (is_premult_alpha && non_opaque) {
-    WebPApplyAlphaMultiply(base_rgba, alpha_first,
+    WebPApplyAlphaMultiply(base_rgba, alpha_first, width, num_lines_out,
-                           width, num_lines_out, buf->stride);
+                           buf->stride);
  }
  return num_lines_out;
 }
@@ -524,16 +520,15 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
  work = (rescaler_t*)p->memory;
  tmp = (uint8_t*)(work + tmp_size1);
-  scalers = (WebPRescaler*)WEBP_ALIGN(
+  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size -
-      (const uint8_t*)work + total_size - rescaler_size);
+                                      rescaler_size);
  p->scaler_y = &scalers[0];
  p->scaler_u = &scalers[1];
  p->scaler_v = &scalers[2];
  p->scaler_a = has_alpha ? &scalers[3] : NULL;
-  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
+  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, tmp + 0 * out_width,
-                        tmp + 0 * out_width, out_width, out_height, 0, 1,
+                        out_width, out_height, 0, 1, work + 0 * work_size) ||
                        work + 0 * work_size) ||
      !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
                        tmp + 1 * out_width, out_width, out_height, 0, 1,
                        work + 1 * work_size) ||
@@ -546,9 +541,8 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
  WebPInitYUV444Converters();
  if (has_alpha) {
-    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
+    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, tmp + 3 * out_width,
-                          tmp + 3 * out_width, out_width, out_height, 0, 1,
+                          out_width, out_height, 0, 1, work + 3 * work_size)) {
                          work + 3 * work_size)) {
      return 0;
    }
    p->emit_alpha = EmitRescaledAlphaRGB;
@@ -616,8 +610,8 @@ static int CustomSetup(VP8Io* io) {
    }
    if (is_alpha) {  // need transparency output
      p->emit_alpha =
-          (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ?
+          (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444)
-              EmitAlphaRGBA4444
+              ? EmitAlphaRGBA4444
          : is_rgb ? EmitAlphaRGB
                   : EmitAlphaYUV;
      if (is_rgb) {
--- a/src/dec/quant_dec.c
+++ b/src/dec/quant_dec.c
@@ -17,48 +17,30 @@
 #include "src/utils/bit_reader_utils.h"
 #include "src/webp/types.h"
-static WEBP_INLINE int clip(int v, int M) {
+static WEBP_INLINE int clip(int v, int M) { return v < 0 ? 0 : v > M ? M : v; }
  return v < 0 ? 0 : v > M ? M : v;
 }
 // Paragraph 14.1
 static const uint8_t kDcTable[128] = {
-  4,     5,   6,   7,   8,   9,  10,  10,
+    4,   5,   6,   7,   8,   9,   10,  10,  11,  12,  13,  14,  15,  16,  17,
-  11,   12,  13,  14,  15,  16,  17,  17,
+    17,  18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,
-  18,   19,  20,  20,  21,  21,  22,  22,
+    27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,
-  23,   23,  24,  25,  25,  26,  27,  28,
+    41,  42,  43,  44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,
-  29,   30,  31,  32,  33,  34,  35,  36,
+    55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
-  37,   37,  38,  39,  40,  41,  42,  43,
+    70,  71,  72,  73,  74,  75,  76,  76,  77,  78,  79,  80,  81,  82,  83,
-  44,   45,  46,  46,  47,  48,  49,  50,
+    84,  85,  86,  87,  88,  89,  91,  93,  95,  96,  98,  100, 101, 102, 104,
-  51,   52,  53,  54,  55,  56,  57,  58,
+    106, 108, 110, 112, 114, 116, 118, 122, 124, 126, 128, 130, 132, 134, 136,
-  59,   60,  61,  62,  63,  64,  65,  66,
+    138, 140, 143, 145, 148, 151, 154, 157};
  67,   68,  69,  70,  71,  72,  73,  74,
  75,   76,  76,  77,  78,  79,  80,  81,
  82,   83,  84,  85,  86,  87,  88,  89,
  91,   93,  95,  96,  98, 100, 101, 102,
  104, 106, 108, 110, 112, 114, 116, 118,
  122, 124, 126, 128, 130, 132, 134, 136,
  138, 140, 143, 145, 148, 151, 154, 157
 };
 static const uint16_t kAcTable[128] = {
-  4,     5,   6,   7,   8,   9,  10,  11,
+    4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,
-  12,   13,  14,  15,  16,  17,  18,  19,
+    19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,
-  20,   21,  22,  23,  24,  25,  26,  27,
+    34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
-  28,   29,  30,  31,  32,  33,  34,  35,
+    49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,
-  36,   37,  38,  39,  40,  41,  42,  43,
+    70,  72,  74,  76,  78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98,
-  44,   45,  46,  47,  48,  49,  50,  51,
+    100, 102, 104, 106, 108, 110, 112, 114, 116, 119, 122, 125, 128, 131, 134,
-  52,   53,  54,  55,  56,  57,  58,  60,
+    137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, 177, 181,
-  62,   64,  66,  68,  70,  72,  74,  76,
+    185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 234, 239, 245,
-  78,   80,  82,  84,  86,  88,  90,  92,
+    249, 254, 259, 264, 269, 274, 279, 284};
  94,   96,  98, 100, 102, 104, 106, 108,
  110, 112, 114, 116, 119, 122, 125, 128,
  131, 134, 137, 140, 143, 146, 149, 152,
  155, 158, 161, 164, 167, 170, 173, 177,
  181, 185, 189, 193, 197, 201, 205, 209,
  213, 217, 221, 225, 229, 234, 239, 245,
  249, 254, 259, 264, 269, 274, 279, 284
 };
 //------------------------------------------------------------------------------
 // Paragraph 9.6
@@ -66,16 +48,21 @@ static const uint16_t kAcTable[128] = {
 void VP8ParseQuant(VP8Decoder* const dec) {
  VP8BitReader* const br = &dec->br;
  const int base_q0 = VP8GetValue(br, 7, "global-header");
-  const int dqy1_dc = VP8Get(br, "global-header") ?
+  const int dqy1_dc = VP8Get(br, "global-header")
-       VP8GetSignedValue(br, 4, "global-header") : 0;
+                          ? VP8GetSignedValue(br, 4, "global-header")
-  const int dqy2_dc = VP8Get(br, "global-header") ?
+                          : 0;
-       VP8GetSignedValue(br, 4, "global-header") : 0;
+  const int dqy2_dc = VP8Get(br, "global-header")
-  const int dqy2_ac = VP8Get(br, "global-header") ?
+                          ? VP8GetSignedValue(br, 4, "global-header")
-       VP8GetSignedValue(br, 4, "global-header") : 0;
+                          : 0;
-  const int dquv_dc = VP8Get(br, "global-header") ?
+  const int dqy2_ac = VP8Get(br, "global-header")
-       VP8GetSignedValue(br, 4, "global-header") : 0;
+                          ? VP8GetSignedValue(br, 4, "global-header")
-  const int dquv_ac = VP8Get(br, "global-header") ?
+                          : 0;
-       VP8GetSignedValue(br, 4, "global-header") : 0;
+  const int dquv_dc = VP8Get(br, "global-header")
                          ? VP8GetSignedValue(br, 4, "global-header")
                          : 0;
  const int dquv_ac = VP8Get(br, "global-header")
                          ? VP8GetSignedValue(br, 4, "global-header")
                          : 0;
  const VP8SegmentHeader* const hdr = &dec->segment_hdr;
  int i;
--- a/src/dec/tree_dec.c
+++ b/src/dec/tree_dec.c
@@ -14,16 +14,15 @@
 #include <string.h>
 #include "src/dec/common_dec.h"
 #include "src/webp/types.h"
 #include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dsp/cpu.h"
 #include "src/utils/bit_reader_inl_utils.h"
 #include "src/utils/bit_reader_utils.h"
 #include "src/webp/types.h"
 #if !defined(USE_GENERIC_TREE)
-#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && \
+#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && !defined(__wasm__)
    !defined(__wasm__)
 // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
 #define USE_GENERIC_TREE 1  // ALTERNATE_CODE
 #else
@@ -33,269 +32,227 @@
 #if (USE_GENERIC_TREE == 1)
 static const int8_t kYModesIntra4[18] = {
-  -B_DC_PRED, 1,
+    -B_DC_PRED, 1, -B_TM_PRED, 2, -B_VE_PRED, 3,
-    -B_TM_PRED, 2,
+    4,          6, -B_HE_PRED, 5, -B_RD_PRED, -B_VR_PRED,
-      -B_VE_PRED, 3,
+    -B_LD_PRED, 7, -B_VL_PRED, 8, -B_HD_PRED, -B_HU_PRED};
        4, 6,
          -B_HE_PRED, 5,
            -B_RD_PRED, -B_VR_PRED,
        -B_LD_PRED, 7,
          -B_VL_PRED, 8,
            -B_HD_PRED, -B_HU_PRED
 };
 #endif
 //------------------------------------------------------------------------------
 // Default probabilities
 // Paragraph 13.5
-static const uint8_t
+static const uint8_t CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
-  CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+    {{{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-  { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+     {{253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128},
      {189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128},
      {106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128}},
     {
         {1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128},
         {181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128},
         {78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128},
     },
-    { { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+     {
-      { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+         {1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128},
-      { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
+         {184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128},
         {77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128},
     },
-    { { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+     {{1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128},
-      { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+      {170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128},
-      { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+      {37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128}},
-    },
+     {{1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128},
-    { { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+      {207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128},
-      { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+      {102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128}},
-      { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+     {{1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128},
-    },
+      {177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128},
-    { { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+      {80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128}},
-      { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+     {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
+      {246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
-    },
+      {255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
-    { { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+    {{{198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62},
-      { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+      {131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1},
-      { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
+      {68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128}},
-    },
+     {{1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128},
-    { { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+      {184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128},
-      { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+      {81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128}},
-      { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
+     {{1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128},
-    },
+      {99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128},
-    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      {23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128}},
-      { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+     {{1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128},
-      { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+      {109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128},
-    }
+      {44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128}},
-  },
+     {{1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128},
-  { { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
+      {94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128},
-      { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
+      {22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128}},
-      { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
+     {{1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128},
-    },
+      {124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128},
-    { { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+      {35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128}},
-      { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+     {{1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128},
-      { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
+      {121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128},
-    },
+      {45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128}},
-    { { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+     {{1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128},
-      { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+      {203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128},
-      { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
+      {137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128}}},
-    },
+    {{{253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128},
-    { { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+      {175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128},
-      { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+      {73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128}},
-      { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
+     {{1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128},
-    },
+      {239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128},
-    { { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+      {155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128}},
-      { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+     {{1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128},
-      { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
+      {201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128},
-    },
+      {69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128}},
-    { { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+     {{1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128},
-      { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+      {223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128},
-      { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
+      {141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128}},
-    },
+     {{1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128},
-    { { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+      {190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128},
-      { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+      {149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
-      { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
+     {{1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128},
-    },
+      {247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128},
-    { { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+      {240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
-      { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+     {{1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128},
-      { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
+      {213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128},
-    }
+      {55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
-  },
+     {{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-  { { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
-      { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
+    {{{202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255},
-    },
+      {126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128},
-    { { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+      {61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128}},
-      { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+     {{1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128},
-      { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
+      {166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128},
-    },
+      {39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128}},
-    { { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+     {{1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128},
-      { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+      {124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128},
-      { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
+      {24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128}},
-    },
+     {{1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128},
-    { { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+      {149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128},
-      { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+      {28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128}},
-      { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
+     {{1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128},
-    },
+      {123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128},
-    { { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      {20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128}},
-      { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+     {{1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128},
-      { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+      {168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128},
-    },
+      {47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128}},
-    { { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+     {{1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128},
-      { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      {141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128},
-      { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+      {42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128}},
-    },
+     {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
-    { { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+      {244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+      {238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}}}};
      { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
    },
    { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
    }
  },
  { { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
      { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
      { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
    },
    { { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
      { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
      { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
    },
    { { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
      { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
      { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
    },
    { { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
      { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
      { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
    },
    { { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
      { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
      { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
    },
    { { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
      { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
      { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
    },
    { { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
      { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
      { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
    },
    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
      { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
      { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
    }
  }
 };
 // Paragraph 11.5
 static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
-  { { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
+    {{231, 120, 48, 89, 115, 113, 120, 152, 112},
-    { 152, 179, 64, 126, 170, 118, 46, 70, 95 },
+     {152, 179, 64, 126, 170, 118, 46, 70, 95},
-    { 175, 69, 143, 80, 85, 82, 72, 155, 103 },
+     {175, 69, 143, 80, 85, 82, 72, 155, 103},
-    { 56, 58, 10, 171, 218, 189, 17, 13, 152 },
+     {56, 58, 10, 171, 218, 189, 17, 13, 152},
-    { 114, 26, 17, 163, 44, 195, 21, 10, 173 },
+     {114, 26, 17, 163, 44, 195, 21, 10, 173},
-    { 121, 24, 80, 195, 26, 62, 44, 64, 85 },
+     {121, 24, 80, 195, 26, 62, 44, 64, 85},
-    { 144, 71, 10, 38, 171, 213, 144, 34, 26 },
+     {144, 71, 10, 38, 171, 213, 144, 34, 26},
-    { 170, 46, 55, 19, 136, 160, 33, 206, 71 },
+     {170, 46, 55, 19, 136, 160, 33, 206, 71},
-    { 63, 20, 8, 114, 114, 208, 12, 9, 226 },
+     {63, 20, 8, 114, 114, 208, 12, 9, 226},
-    { 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
+     {81, 40, 11, 96, 182, 84, 29, 16, 36}},
-  { { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
+    {{134, 183, 89, 137, 98, 101, 106, 165, 148},
-    { 72, 187, 100, 130, 157, 111, 32, 75, 80 },
+     {72, 187, 100, 130, 157, 111, 32, 75, 80},
-    { 66, 102, 167, 99, 74, 62, 40, 234, 128 },
+     {66, 102, 167, 99, 74, 62, 40, 234, 128},
-    { 41, 53, 9, 178, 241, 141, 26, 8, 107 },
+     {41, 53, 9, 178, 241, 141, 26, 8, 107},
-    { 74, 43, 26, 146, 73, 166, 49, 23, 157 },
+     {74, 43, 26, 146, 73, 166, 49, 23, 157},
-    { 65, 38, 105, 160, 51, 52, 31, 115, 128 },
+     {65, 38, 105, 160, 51, 52, 31, 115, 128},
-    { 104, 79, 12, 27, 217, 255, 87, 17, 7 },
+     {104, 79, 12, 27, 217, 255, 87, 17, 7},
-    { 87, 68, 71, 44, 114, 51, 15, 186, 23 },
+     {87, 68, 71, 44, 114, 51, 15, 186, 23},
-    { 47, 41, 14, 110, 182, 183, 21, 17, 194 },
+     {47, 41, 14, 110, 182, 183, 21, 17, 194},
-    { 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
+     {66, 45, 25, 102, 197, 189, 23, 18, 22}},
-  { { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
+    {{88, 88, 147, 150, 42, 46, 45, 196, 205},
-    { 43, 97, 183, 117, 85, 38, 35, 179, 61 },
+     {43, 97, 183, 117, 85, 38, 35, 179, 61},
-    { 39, 53, 200, 87, 26, 21, 43, 232, 171 },
+     {39, 53, 200, 87, 26, 21, 43, 232, 171},
-    { 56, 34, 51, 104, 114, 102, 29, 93, 77 },
+     {56, 34, 51, 104, 114, 102, 29, 93, 77},
-    { 39, 28, 85, 171, 58, 165, 90, 98, 64 },
+     {39, 28, 85, 171, 58, 165, 90, 98, 64},
-    { 34, 22, 116, 206, 23, 34, 43, 166, 73 },
+     {34, 22, 116, 206, 23, 34, 43, 166, 73},
-    { 107, 54, 32, 26, 51, 1, 81, 43, 31 },
+     {107, 54, 32, 26, 51, 1, 81, 43, 31},
-    { 68, 25, 106, 22, 64, 171, 36, 225, 114 },
+     {68, 25, 106, 22, 64, 171, 36, 225, 114},
-    { 34, 19, 21, 102, 132, 188, 16, 76, 124 },
+     {34, 19, 21, 102, 132, 188, 16, 76, 124},
-    { 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
+     {62, 18, 78, 95, 85, 57, 50, 48, 51}},
-  { { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
+    {{193, 101, 35, 159, 215, 111, 89, 46, 111},
-    { 60, 148, 31, 172, 219, 228, 21, 18, 111 },
+     {60, 148, 31, 172, 219, 228, 21, 18, 111},
-    { 112, 113, 77, 85, 179, 255, 38, 120, 114 },
+     {112, 113, 77, 85, 179, 255, 38, 120, 114},
-    { 40, 42, 1, 196, 245, 209, 10, 25, 109 },
+     {40, 42, 1, 196, 245, 209, 10, 25, 109},
-    { 88, 43, 29, 140, 166, 213, 37, 43, 154 },
+     {88, 43, 29, 140, 166, 213, 37, 43, 154},
-    { 61, 63, 30, 155, 67, 45, 68, 1, 209 },
+     {61, 63, 30, 155, 67, 45, 68, 1, 209},
-    { 100, 80, 8, 43, 154, 1, 51, 26, 71 },
+     {100, 80, 8, 43, 154, 1, 51, 26, 71},
-    { 142, 78, 78, 16, 255, 128, 34, 197, 171 },
+     {142, 78, 78, 16, 255, 128, 34, 197, 171},
-    { 41, 40, 5, 102, 211, 183, 4, 1, 221 },
+     {41, 40, 5, 102, 211, 183, 4, 1, 221},
-    { 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
+     {51, 50, 17, 168, 209, 192, 23, 25, 82}},
-  { { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
+    {{138, 31, 36, 171, 27, 166, 38, 44, 229},
-    { 67, 87, 58, 169, 82, 115, 26, 59, 179 },
+     {67, 87, 58, 169, 82, 115, 26, 59, 179},
-    { 63, 59, 90, 180, 59, 166, 93, 73, 154 },
+     {63, 59, 90, 180, 59, 166, 93, 73, 154},
-    { 40, 40, 21, 116, 143, 209, 34, 39, 175 },
+     {40, 40, 21, 116, 143, 209, 34, 39, 175},
-    { 47, 15, 16, 183, 34, 223, 49, 45, 183 },
+     {47, 15, 16, 183, 34, 223, 49, 45, 183},
-    { 46, 17, 33, 183, 6, 98, 15, 32, 183 },
+     {46, 17, 33, 183, 6, 98, 15, 32, 183},
-    { 57, 46, 22, 24, 128, 1, 54, 17, 37 },
+     {57, 46, 22, 24, 128, 1, 54, 17, 37},
-    { 65, 32, 73, 115, 28, 128, 23, 128, 205 },
+     {65, 32, 73, 115, 28, 128, 23, 128, 205},
-    { 40, 3, 9, 115, 51, 192, 18, 6, 223 },
+     {40, 3, 9, 115, 51, 192, 18, 6, 223},
-    { 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
+     {87, 37, 9, 115, 59, 77, 64, 21, 47}},
-  { { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
+    {{104, 55, 44, 218, 9, 54, 53, 130, 226},
-    { 64, 90, 70, 205, 40, 41, 23, 26, 57 },
+     {64, 90, 70, 205, 40, 41, 23, 26, 57},
-    { 54, 57, 112, 184, 5, 41, 38, 166, 213 },
+     {54, 57, 112, 184, 5, 41, 38, 166, 213},
-    { 30, 34, 26, 133, 152, 116, 10, 32, 134 },
+     {30, 34, 26, 133, 152, 116, 10, 32, 134},
-    { 39, 19, 53, 221, 26, 114, 32, 73, 255 },
+     {39, 19, 53, 221, 26, 114, 32, 73, 255},
-    { 31, 9, 65, 234, 2, 15, 1, 118, 73 },
+     {31, 9, 65, 234, 2, 15, 1, 118, 73},
-    { 75, 32, 12, 51, 192, 255, 160, 43, 51 },
+     {75, 32, 12, 51, 192, 255, 160, 43, 51},
-    { 88, 31, 35, 67, 102, 85, 55, 186, 85 },
+     {88, 31, 35, 67, 102, 85, 55, 186, 85},
-    { 56, 21, 23, 111, 59, 205, 45, 37, 192 },
+     {56, 21, 23, 111, 59, 205, 45, 37, 192},
-    { 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
+     {55, 38, 70, 124, 73, 102, 1, 34, 98}},
-  { { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
+    {{125, 98, 42, 88, 104, 85, 117, 175, 82},
-    { 95, 84, 53, 89, 128, 100, 113, 101, 45 },
+     {95, 84, 53, 89, 128, 100, 113, 101, 45},
-    { 75, 79, 123, 47, 51, 128, 81, 171, 1 },
+     {75, 79, 123, 47, 51, 128, 81, 171, 1},
-    { 57, 17, 5, 71, 102, 57, 53, 41, 49 },
+     {57, 17, 5, 71, 102, 57, 53, 41, 49},
-    { 38, 33, 13, 121, 57, 73, 26, 1, 85 },
+     {38, 33, 13, 121, 57, 73, 26, 1, 85},
-    { 41, 10, 67, 138, 77, 110, 90, 47, 114 },
+     {41, 10, 67, 138, 77, 110, 90, 47, 114},
-    { 115, 21, 2, 10, 102, 255, 166, 23, 6 },
+     {115, 21, 2, 10, 102, 255, 166, 23, 6},
-    { 101, 29, 16, 10, 85, 128, 101, 196, 26 },
+     {101, 29, 16, 10, 85, 128, 101, 196, 26},
-    { 57, 18, 10, 102, 102, 213, 34, 20, 43 },
+     {57, 18, 10, 102, 102, 213, 34, 20, 43},
-    { 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
+     {117, 20, 15, 36, 163, 128, 68, 1, 26}},
-  { { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
+    {{102, 61, 71, 37, 34, 53, 31, 243, 192},
-    { 69, 60, 71, 38, 73, 119, 28, 222, 37 },
+     {69, 60, 71, 38, 73, 119, 28, 222, 37},
-    { 68, 45, 128, 34, 1, 47, 11, 245, 171 },
+     {68, 45, 128, 34, 1, 47, 11, 245, 171},
-    { 62, 17, 19, 70, 146, 85, 55, 62, 70 },
+     {62, 17, 19, 70, 146, 85, 55, 62, 70},
-    { 37, 43, 37, 154, 100, 163, 85, 160, 1 },
+     {37, 43, 37, 154, 100, 163, 85, 160, 1},
-    { 63, 9, 92, 136, 28, 64, 32, 201, 85 },
+     {63, 9, 92, 136, 28, 64, 32, 201, 85},
-    { 75, 15, 9, 9, 64, 255, 184, 119, 16 },
+     {75, 15, 9, 9, 64, 255, 184, 119, 16},
-    { 86, 6, 28, 5, 64, 255, 25, 248, 1 },
+     {86, 6, 28, 5, 64, 255, 25, 248, 1},
-    { 56, 8, 17, 132, 137, 255, 55, 116, 128 },
+     {56, 8, 17, 132, 137, 255, 55, 116, 128},
-    { 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
+     {58, 15, 20, 82, 135, 57, 26, 121, 40}},
-  { { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
+    {{164, 50, 31, 137, 154, 133, 25, 35, 218},
-    { 51, 103, 44, 131, 131, 123, 31, 6, 158 },
+     {51, 103, 44, 131, 131, 123, 31, 6, 158},
-    { 86, 40, 64, 135, 148, 224, 45, 183, 128 },
+     {86, 40, 64, 135, 148, 224, 45, 183, 128},
-    { 22, 26, 17, 131, 240, 154, 14, 1, 209 },
+     {22, 26, 17, 131, 240, 154, 14, 1, 209},
-    { 45, 16, 21, 91, 64, 222, 7, 1, 197 },
+     {45, 16, 21, 91, 64, 222, 7, 1, 197},
-    { 56, 21, 39, 155, 60, 138, 23, 102, 213 },
+     {56, 21, 39, 155, 60, 138, 23, 102, 213},
-    { 83, 12, 13, 54, 192, 255, 68, 47, 28 },
+     {83, 12, 13, 54, 192, 255, 68, 47, 28},
-    { 85, 26, 85, 85, 128, 128, 32, 146, 171 },
+     {85, 26, 85, 85, 128, 128, 32, 146, 171},
-    { 18, 11, 7, 63, 144, 171, 4, 4, 246 },
+     {18, 11, 7, 63, 144, 171, 4, 4, 246},
-    { 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
+     {35, 27, 10, 146, 174, 171, 12, 26, 128}},
-  { { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
+    {{190, 80, 35, 99, 180, 80, 126, 54, 45},
-    { 85, 126, 47, 87, 176, 51, 41, 20, 32 },
+     {85, 126, 47, 87, 176, 51, 41, 20, 32},
-    { 101, 75, 128, 139, 118, 146, 116, 128, 85 },
+     {101, 75, 128, 139, 118, 146, 116, 128, 85},
-    { 56, 41, 15, 176, 236, 85, 37, 9, 62 },
+     {56, 41, 15, 176, 236, 85, 37, 9, 62},
-    { 71, 30, 17, 119, 118, 255, 17, 18, 138 },
+     {71, 30, 17, 119, 118, 255, 17, 18, 138},
-    { 101, 38, 60, 138, 55, 70, 43, 26, 142 },
+     {101, 38, 60, 138, 55, 70, 43, 26, 142},
-    { 146, 36, 19, 30, 171, 255, 97, 27, 20 },
+     {146, 36, 19, 30, 171, 255, 97, 27, 20},
-    { 138, 45, 61, 62, 219, 1, 81, 188, 64 },
+     {138, 45, 61, 62, 219, 1, 81, 188, 64},
-    { 32, 41, 20, 117, 151, 142, 20, 21, 163 },
+     {32, 41, 20, 117, 151, 142, 20, 21, 163},
-    { 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
+     {112, 19, 12, 61, 195, 128, 48, 4, 24}}};
 };
 void VP8ResetProba(VP8Proba* const proba) {
  memset(proba->segments, 255u, sizeof(proba->segments));
  // proba->bands[][] is initialized later
 }
-static void ParseIntraMode(VP8BitReader* const br,
+static void ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec,
-                           VP8Decoder* const dec, int mb_x) {
+                           int mb_x) {
  uint8_t* const top = dec->intra_t + 4 * mb_x;
  uint8_t* const left = dec->intra_l;
  VP8MBData* const block = dec->mb_data + mb_x;
@@ -304,7 +261,8 @@ static void ParseIntraMode(VP8BitReader* const br,
  // to decode more than 1 keyframe.
  if (dec->segment_hdr.update_map) {
    // Hardcoded tree parsing
-    block->segment = !VP8GetBit(br, dec->proba.segments[0], "segments")
+    block->segment =
        !VP8GetBit(br, dec->proba.segments[0], "segments")
            ? VP8GetBit(br, dec->proba.segments[1], "segments")
            : VP8GetBit(br, dec->proba.segments[2], "segments") + 2;
  } else {
@@ -316,9 +274,9 @@ static void ParseIntraMode(VP8BitReader* const br,
  if (!block->is_i4x4) {
    // Hardcoded 16x16 intra-mode decision tree.
    const int ymode =
-        VP8GetBit(br, 156, "pred-modes") ?
+        VP8GetBit(br, 156, "pred-modes")
-            (VP8GetBit(br, 128, "pred-modes") ? TM_PRED : H_PRED) :
+            ? (VP8GetBit(br, 128, "pred-modes") ? TM_PRED : H_PRED)
-            (VP8GetBit(br, 163, "pred-modes") ? V_PRED : DC_PRED);
+            : (VP8GetBit(br, 163, "pred-modes") ? V_PRED : DC_PRED);
    block->imodes[0] = ymode;
    memset(top, ymode, 4 * sizeof(*top));
    memset(left, ymode, 4 * sizeof(*left));
@@ -339,18 +297,22 @@ static void ParseIntraMode(VP8BitReader* const br,
        ymode = -i;
 #else
        // Hardcoded tree parsing
-        ymode = !VP8GetBit(br, prob[0], "pred-modes") ? B_DC_PRED :
+        ymode =
-                  !VP8GetBit(br, prob[1], "pred-modes") ? B_TM_PRED :
+            !VP8GetBit(br, prob[0], "pred-modes")   ? B_DC_PRED
-                    !VP8GetBit(br, prob[2], "pred-modes") ? B_VE_PRED :
+            : !VP8GetBit(br, prob[1], "pred-modes") ? B_TM_PRED
-                      !VP8GetBit(br, prob[3], "pred-modes") ?
+            : !VP8GetBit(br, prob[2], "pred-modes") ? B_VE_PRED
-                        (!VP8GetBit(br, prob[4], "pred-modes") ? B_HE_PRED :
+            : !VP8GetBit(br, prob[3], "pred-modes")
-                          (!VP8GetBit(br, prob[5], "pred-modes") ? B_RD_PRED
+                ? (!VP8GetBit(br, prob[4], "pred-modes")
-                                                                 : B_VR_PRED)) :
+                       ? B_HE_PRED
-                        (!VP8GetBit(br, prob[6], "pred-modes") ? B_LD_PRED :
+                       : (!VP8GetBit(br, prob[5], "pred-modes") ? B_RD_PRED
-                          (!VP8GetBit(br, prob[7], "pred-modes") ? B_VL_PRED :
+                                                                : B_VR_PRED))
-                            (!VP8GetBit(br, prob[8], "pred-modes") ? B_HD_PRED
+                : (!VP8GetBit(br, prob[6], "pred-modes")
-                                                                   : B_HU_PRED))
+                       ? B_LD_PRED
-                        );
+                       : (!VP8GetBit(br, prob[7], "pred-modes")
                              ? B_VL_PRED
                              : (!VP8GetBit(br, prob[8], "pred-modes")
                                     ? B_HD_PRED
                                     : B_HU_PRED)));
 #endif  // USE_GENERIC_TREE
        top[x] = ymode;
      }
@@ -362,7 +324,8 @@ static void ParseIntraMode(VP8BitReader* const br,
  // Hardcoded UVMode decision tree
  block->uvmode = !VP8GetBit(br, 142, "pred-modes-uv")   ? DC_PRED
                  : !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
-                : VP8GetBit(br, 183, "pred-modes-uv") ? TM_PRED : H_PRED;
+                  : VP8GetBit(br, 183, "pred-modes-uv")  ? TM_PRED
                                                         : H_PRED;
 }
 int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
@@ -378,139 +341,102 @@ int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
 static const uint8_t
    CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
-  { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        {{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-    },
+         {{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255}},
-      { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }
+         {{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-      { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
-    { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-    },
+         {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+         {{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255},
-    },
+          {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255},
-    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-      { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
-    { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
+        {{{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
+          {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255},
-      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255}},
-    },
+         {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+         {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    }
+          {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-  },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-  { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
+          {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-    },
+         {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-      { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+         {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-      { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-    { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
-    },
+        {{{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255},
-      { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255}},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+         {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255}},
-      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-    { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-    },
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    }
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-  },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-  { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-    },
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
-      { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }
+        {{{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255}},
-      { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255}},
-    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
-    },
+         {{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+         {{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
-    },
+          {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+         {{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
-    },
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}}};
      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
    }
  },
  { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
      { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }
    },
    { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }
    },
    { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
    },
    { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
    },
    { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
    },
    { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
    },
    { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
    },
    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
    }
  }
 };
 // Paragraph 9.9
@@ -527,9 +453,9 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
      for (c = 0; c < NUM_CTX; ++c) {
        for (p = 0; p < NUM_PROBAS; ++p) {
          const int v =
-              VP8GetBit(br, CoeffsUpdateProba[t][b][c][p], "global-header") ?
+              VP8GetBit(br, CoeffsUpdateProba[t][b][c][p], "global-header")
-                        VP8GetValue(br, 8, "global-header") :
+                  ? VP8GetValue(br, 8, "global-header")
-                        CoeffsProba0[t][b][c][p];
+                  : CoeffsProba0[t][b][c][p];
          proba->bands[t][b].probas[c][p] = v;
        }
      }
--- a/src/dec/vp8_dec.c
+++ b/src/dec/vp8_dec.c
@@ -11,13 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 #include "src/dec/vp8_dec.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 #include "src/dec/alphai_dec.h"
 #include "src/dec/common_dec.h"
 #include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dec/vp8li_dec.h"
 #include "src/dec/webpi_dec.h"
@@ -41,8 +42,8 @@ int WebPGetDecoderVersion(void) {
 // Signature and pointer-to-function for GetCoeffs() variants below.
 typedef int (*GetCoeffsFunc)(VP8BitReader* const br,
-                             const VP8BandProbas* const prob[],
+                             const VP8BandProbas* const prob[], int ctx,
-                             int ctx, const quant_t dq, int n, int16_t* out);
+                             const quant_t dq, int n, int16_t* out);
 static volatile GetCoeffsFunc GetCoeffs = NULL;
 static void InitGetCoeffs(void);
@@ -95,8 +96,8 @@ void VP8Delete(VP8Decoder* const dec) {
  }
 }
-int VP8SetError(VP8Decoder* const dec,
+int VP8SetError(VP8Decoder* const dec, VP8StatusCode error,
-                VP8StatusCode error, const char* const msg) {
+                const char* const msg) {
  // VP8_STATUS_SUSPENDED is only meaningful in incremental decoding.
  assert(dec->incremental || error != VP8_STATUS_SUSPENDED);
  // The oldest error reported takes precedence over the new one.
@@ -111,8 +112,8 @@ int VP8SetError(VP8Decoder* const dec,
 //------------------------------------------------------------------------------
 int VP8CheckSignature(const uint8_t* const data, size_t data_size) {
-  return (data_size >= 3 &&
+  return (data_size >= 3 && data[0] == 0x9d && data[1] == 0x01 &&
-          data[0] == 0x9d && data[1] == 0x01 && data[2] == 0x2a);
+          data[2] == 0x2a);
 }
 int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
@@ -170,8 +171,8 @@ static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
 }
 // Paragraph 9.3
-static int ParseSegmentHeader(VP8BitReader* br,
+static int ParseSegmentHeader(VP8BitReader* br, VP8SegmentHeader* hdr,
-                              VP8SegmentHeader* hdr, VP8Proba* proba) {
+                              VP8Proba* proba) {
  assert(br != NULL);
  assert(hdr != NULL);
  hdr->use_segment = VP8Get(br, "global-header");
@@ -181,19 +182,23 @@ static int ParseSegmentHeader(VP8BitReader* br,
      int s;
      hdr->absolute_delta = VP8Get(br, "global-header");
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        hdr->quantizer[s] = VP8Get(br, "global-header") ?
+        hdr->quantizer[s] = VP8Get(br, "global-header")
-            VP8GetSignedValue(br, 7, "global-header") : 0;
+                                ? VP8GetSignedValue(br, 7, "global-header")
                                : 0;
      }
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        hdr->filter_strength[s] = VP8Get(br, "global-header") ?
+        hdr->filter_strength[s] =
-            VP8GetSignedValue(br, 6, "global-header") : 0;
+            VP8Get(br, "global-header")
                ? VP8GetSignedValue(br, 6, "global-header")
                : 0;
      }
    }
    if (hdr->update_map) {
      int s;
      for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
-        proba->segments[s] = VP8Get(br, "global-header") ?
+        proba->segments[s] = VP8Get(br, "global-header")
-            VP8GetValue(br, 8, "global-header") : 255u;
+                                 ? VP8GetValue(br, 8, "global-header")
                                 : 255u;
      }
    }
  } else {
@@ -211,8 +216,8 @@ static int ParseSegmentHeader(VP8BitReader* br,
 // If we don't even have the partitions' sizes, then VP8_STATUS_NOT_ENOUGH_DATA
 // is returned, and this is an unrecoverable error.
 // If the partitions were positioned ok, VP8_STATUS_OK is returned.
-static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
+static VP8StatusCode ParsePartitions(VP8Decoder* const dec, const uint8_t* buf,
-                                     const uint8_t* buf, size_t size) {
+                                     size_t size) {
  VP8BitReader* const br = &dec->br;
  const uint8_t* sz = buf;
  const uint8_t* buf_end = buf + size;
@@ -290,8 +295,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
  buf = io->data;
  buf_size = io->data_size;
  if (buf_size < 4) {
-    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "Truncated header.");
                       "Truncated header.");
  }
  // Paragraph 9.1
@@ -322,8 +326,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
                         "cannot parse picture header");
    }
    if (!VP8CheckSignature(buf, buf_size)) {
-      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, "Bad code word");
                         "Bad code word");
    }
    pic_hdr->width = ((buf[4] << 8) | buf[3]) & 0x3fff;
    pic_hdr->xscale = buf[4] >> 6;  // ratio: 1, 5/4 5/3 or 2
@@ -360,8 +363,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
  // Check if we have all the partition #0 available, and initialize dec->br
  // to read this partition (and this partition only).
  if (frm_hdr->partition_length > buf_size) {
-    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "bad partition length");
                       "bad partition length");
  }
  br = &dec->br;
@@ -392,8 +394,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
  // Frame buffer marking
  if (!frm_hdr->key_frame) {
-    return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
+    return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE, "Not a key frame.");
                       "Not a key frame.");
  }
  VP8Get(br, "global-header");  // ignore the value of 'update_proba'
@@ -408,15 +409,14 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
 //------------------------------------------------------------------------------
 // Residual decoding (Paragraph 13.2 / 13.3)
-static const uint8_t kCat3[] = { 173, 148, 140, 0 };
+static const uint8_t kCat3[] = {173, 148, 140, 0};
-static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
+static const uint8_t kCat4[] = {176, 155, 140, 135, 0};
-static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
+static const uint8_t kCat5[] = {180, 157, 141, 134, 130, 0};
-static const uint8_t kCat6[] =
+static const uint8_t kCat6[] = {254, 254, 243, 230, 196, 177,
-  { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+                                153, 140, 133, 130, 129, 0};
-static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
+static const uint8_t* const kCat3456[] = {kCat3, kCat4, kCat5, kCat6};
-static const uint8_t kZigzag[16] = {
+static const uint8_t kZigzag[16] = {0, 1,  4,  8,  5, 2,  3,  6,
-  0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
+                                    9, 12, 13, 10, 7, 11, 14, 15};
 };
 // See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2
 static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
@@ -452,8 +452,8 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
 // Returns the position of the last non-zero coeff plus one
 static int GetCoeffsFast(VP8BitReader* const br,
-                         const VP8BandProbas* const prob[],
+                         const VP8BandProbas* const prob[], int ctx,
-                         int ctx, const quant_t dq, int n, int16_t* out) {
+                         const quant_t dq, int n, int16_t* out) {
  const uint8_t* p = prob[n]->probas[ctx];
  for (; n < 16; ++n) {
    if (!VP8GetBit(br, p[0], "coeffs")) {
@@ -482,8 +482,8 @@ static int GetCoeffsFast(VP8BitReader* const br,
 // This version of GetCoeffs() uses VP8GetBitAlt() which is an alternate version
 // of VP8GetBitAlt() targeting specific platforms.
 static int GetCoeffsAlt(VP8BitReader* const br,
-                        const VP8BandProbas* const prob[],
+                        const VP8BandProbas* const prob[], int ctx,
-                        int ctx, const quant_t dq, int n, int16_t* out) {
+                        const quant_t dq, int n, int16_t* out) {
  const uint8_t* p = prob[n]->probas[ctx];
  for (; n < 16; ++n) {
    if (!VP8GetBitAlt(br, p[0], "coeffs")) {
@@ -525,10 +525,10 @@ static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
  return nz_coeffs;
 }
-static int ParseResiduals(VP8Decoder* const dec,
+static int ParseResiduals(VP8Decoder* const dec, VP8MB* const mb,
-                          VP8MB* const mb, VP8BitReader* const token_br) {
+                          VP8BitReader* const token_br) {
-  const VP8BandProbas* (* const bands)[16 + 1] = dec->proba.bands_ptr;
+  const VP8BandProbas*(*const bands)[16 + 1] = dec->proba.bands_ptr;
-  const VP8BandProbas* const * ac_proba;
+  const VP8BandProbas* const* ac_proba;
  VP8MBData* const block = dec->mb_data + dec->mb_x;
  const VP8QuantMatrix* const q = &dec->dqm[block->segment];
  int16_t* dst = block->coeffs;
@@ -542,7 +542,7 @@ static int ParseResiduals(VP8Decoder* const dec,
  memset(dst, 0, 384 * sizeof(*dst));
  if (!block->is_i4x4) {  // parse DC
-    int16_t dc[16] = { 0 };
+    int16_t dc[16] = {0};
    const int ctx = mb->nz_dc + left_mb->nz_dc;
    const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat, 0, dc);
    mb->nz_dc = left_mb->nz_dc = (nz > 0);
--- a/src/dec/vp8_dec.h
+++ b/src/dec/vp8_dec.h
@@ -58,7 +58,7 @@ struct VP8Io {
  int mb_y;                  // position of the current rows (in pixels)
  int mb_w;                  // number of columns in the sample
  int mb_h;                  // number of rows in the sample
-  const uint8_t* y, *u, *v;  // rows to copy (in yuv420 format)
+  const uint8_t *y, *u, *v;  // rows to copy (in yuv420 format)
  int y_stride;              // row stride for luma
  int uv_stride;             // row stride for chroma
@@ -175,9 +175,10 @@ WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
 // Validates the VP8L data-header and retrieves basic header information viz
 // width, height and alpha. Returns 0 in case of formatting error.
 // width/height/has_alpha can be passed NULL.
-WEBP_EXTERN int VP8LGetInfo(
+WEBP_EXTERN int VP8LGetInfo(const uint8_t* data,
-    const uint8_t* data, size_t data_size,  // data available so far
+                            size_t data_size,  // data available so far
-    int* const width, int* const height, int* const has_alpha);
+                            int* const width, int* const height,
                            int* const has_alpha);
 #ifdef __cplusplus
 }  // extern "C"
--- a/src/dec/vp8i_dec.h
+++ b/src/dec/vp8i_dec.h
@@ -274,8 +274,8 @@ struct VP8Decoder {
 // internal functions. Not public.
 // in vp8.c
-int VP8SetError(VP8Decoder* const dec,
+int VP8SetError(VP8Decoder* const dec, VP8StatusCode error,
-                VP8StatusCode error, const char* const msg);
+                const char* const msg);
 // in tree.c
 void VP8ResetProba(VP8Proba* const proba);
@@ -299,8 +299,8 @@ WEBP_NODISCARD int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
 // Return the multi-threading method to use (0=off), depending
 // on options and bitstream size. Only for lossy decoding.
 int VP8GetThreadMethod(const WebPDecoderOptions* const options,
-                       const WebPHeaderStructure* const headers,
+                       const WebPHeaderStructure* const headers, int width,
-                       int width, int height);
+                       int height);
 // Initialize dithering post-process if needed.
 void VP8InitDithering(const WebPDecoderOptions* const options,
                      VP8Decoder* const dec);
@@ -314,8 +314,8 @@ WEBP_NODISCARD int VP8DecodeMB(VP8Decoder* const dec,
 // in alpha.c
 const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
-                                      const VP8Io* const io,
+                                      const VP8Io* const io, int row,
-                                      int row, int num_rows);
+                                      int num_rows);
 //------------------------------------------------------------------------------
--- a/src/dec/vp8l_dec.c
+++ b/src/dec/vp8l_dec.c
@@ -37,8 +37,8 @@
 static const int kCodeLengthLiterals = 16;
 static const int kCodeLengthRepeatCode = 16;
-static const uint8_t kCodeLengthExtraBits[3] = { 2, 3, 7 };
+static const uint8_t kCodeLengthExtraBits[3] = {2, 3, 7};
-static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
+static const uint8_t kCodeLengthRepeatOffsets[3] = {3, 3, 11};
 // -----------------------------------------------------------------------------
 //  Five Huffman codes are used at each meta code:
@@ -47,44 +47,30 @@ static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
 //  3. red,
 //  4. blue, and,
 //  5. distance prefix codes.
-typedef enum {
+typedef enum { GREEN = 0, RED = 1, BLUE = 2, ALPHA = 3, DIST = 4 } HuffIndex;
  GREEN = 0,
  RED   = 1,
  BLUE  = 2,
  ALPHA = 3,
  DIST  = 4
 } HuffIndex;
 static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = {
-  NUM_LITERAL_CODES + NUM_LENGTH_CODES,
+    NUM_LITERAL_CODES + NUM_LENGTH_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES,
-  NUM_LITERAL_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES,
+    NUM_LITERAL_CODES, NUM_DISTANCE_CODES};
  NUM_DISTANCE_CODES
 };
-static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = {
+static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = {0, 1, 1, 1, 0};
  0, 1, 1, 1, 0
 };
 #define NUM_CODE_LENGTH_CODES 19
 static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
-  17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+    17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
 };
 #define CODE_TO_PLANE_CODES 120
 static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
-  0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
+    0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, 0x26, 0x2a,
-  0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
+    0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a, 0x25, 0x2b, 0x48, 0x04,
-  0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
+    0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45,
-  0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
+    0x4b, 0x34, 0x3c, 0x03, 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d,
-  0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
+    0x44, 0x4c, 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
-  0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
+    0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, 0x32, 0x3e,
-  0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
+    0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, 0x64, 0x6c, 0x42, 0x4e,
-  0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
+    0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b, 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e,
-  0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
+    0x00, 0x74, 0x7c, 0x41, 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d,
-  0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
+    0x51, 0x5f, 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70};
  0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
  0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
 };
 // Memory needed for lookup tables of one Huffman tree group. Red, blue, alpha
 // and distance alphabets are constant (256 for red, blue and alpha, 40 for
@@ -96,19 +82,10 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
 // https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c
 #define FIXED_TABLE_SIZE (630 * 3 + 410)
 static const uint16_t kTableSize[12] = {
-  FIXED_TABLE_SIZE + 654,
+    FIXED_TABLE_SIZE + 654,  FIXED_TABLE_SIZE + 656,  FIXED_TABLE_SIZE + 658,
-  FIXED_TABLE_SIZE + 656,
+    FIXED_TABLE_SIZE + 662,  FIXED_TABLE_SIZE + 670,  FIXED_TABLE_SIZE + 686,
-  FIXED_TABLE_SIZE + 658,
+    FIXED_TABLE_SIZE + 718,  FIXED_TABLE_SIZE + 782,  FIXED_TABLE_SIZE + 912,
-  FIXED_TABLE_SIZE + 662,
+    FIXED_TABLE_SIZE + 1168, FIXED_TABLE_SIZE + 1680, FIXED_TABLE_SIZE + 2704};
  FIXED_TABLE_SIZE + 670,
  FIXED_TABLE_SIZE + 686,
  FIXED_TABLE_SIZE + 718,
  FIXED_TABLE_SIZE + 782,
  FIXED_TABLE_SIZE + 912,
  FIXED_TABLE_SIZE + 1168,
  FIXED_TABLE_SIZE + 1680,
  FIXED_TABLE_SIZE + 2704
 };
 static int VP8LSetError(VP8LDecoder* const dec, VP8StatusCode error) {
  // The oldest error reported takes precedence over the new one.
@@ -118,22 +95,19 @@ static int VP8LSetError(VP8LDecoder* const dec, VP8StatusCode error) {
  return 0;
 }
-static int DecodeImageStream(int xsize, int ysize,
+static int DecodeImageStream(int xsize, int ysize, int is_level0,
                             int is_level0,
                             VP8LDecoder* const dec,
                             uint32_t** const decoded_data);
 //------------------------------------------------------------------------------
 int VP8LCheckSignature(const uint8_t* const data, size_t size) {
-  return (size >= VP8L_FRAME_HEADER_SIZE &&
+  return (size >= VP8L_FRAME_HEADER_SIZE && data[0] == VP8L_MAGIC_BYTE &&
          data[0] == VP8L_MAGIC_BYTE &&
          (data[4] >> 5) == 0);  // version
 }
-static int ReadImageInfo(VP8LBitReader* const br,
+static int ReadImageInfo(VP8LBitReader* const br, int* const width,
-                         int* const width, int* const height,
+                         int* const height, int* const has_alpha) {
                         int* const has_alpha) {
  if (VP8LReadBits(br, 8) != VP8L_MAGIC_BYTE) return 0;
  *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
  *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
@@ -142,8 +116,8 @@ static int ReadImageInfo(VP8LBitReader* const br,
  return !br->eos;
 }
-int VP8LGetInfo(const uint8_t* data, size_t data_size,
+int VP8LGetInfo(const uint8_t* data, size_t data_size, int* const width,
-                int* const width, int* const height, int* const has_alpha) {
+                int* const height, int* const has_alpha) {
  if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) {
    return 0;  // not enough data
  } else if (!VP8LCheckSignature(data, data_size)) {
@@ -262,8 +236,8 @@ static void BuildPackedTable(HTreeGroup* const htree_group) {
  }
 }
-static int ReadHuffmanCodeLengths(
+static int ReadHuffmanCodeLengths(VP8LDecoder* const dec,
-    VP8LDecoder* const dec, const int* const code_length_code_lengths,
+                                  const int* const code_length_code_lengths,
                                  int num_symbols, int* const code_lengths) {
  int ok = 0;
  VP8LBitReader* const br = &dec->br;
@@ -316,7 +290,7 @@ static int ReadHuffmanCodeLengths(
  }
  ok = 1;
- End:
+End:
  VP8LHuffmanTablesDeallocate(&tables);
  if (!ok) return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
  return ok;
@@ -348,7 +322,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
    ok = 1;
  } else {  // Decode Huffman-coded code lengths.
    int i;
-    int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 };
+    int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = {0};
    const int num_codes = VP8LReadBits(br, 4) + 4;
    assert(num_codes <= NUM_CODE_LENGTH_CODES);
@@ -361,8 +335,8 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
  ok = ok && !br->eos;
  if (ok) {
-    size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS,
+    size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS, code_lengths,
-                                 code_lengths, alphabet_size);
+                                 alphabet_size);
  }
  if (!ok || size == 0) {
    return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
@@ -449,7 +423,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
  hdr->num_htree_groups = num_htree_groups;
  hdr->htree_groups = htree_groups;
- Error:
+Error:
  WebPSafeFree(mapping);
  if (!ok) {
    WebPSafeFree(huffman_image);
@@ -554,7 +528,7 @@ int ReadHuffmanCodesHelper(int color_cache_bits, int num_htree_groups,
  }
  ok = 1;
- Error:
+Error:
  WebPSafeFree(code_lengths);
  if (!ok) {
    VP8LHuffmanTablesDeallocate(huffman_tables);
@@ -595,8 +569,8 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
  scaled_data = (uint32_t*)memory;
  if (!WebPRescalerInit(dec->rescaler, in_width, in_height,
-                        (uint8_t*)scaled_data, out_width, out_height,
+                        (uint8_t*)scaled_data, out_width, out_height, 0,
-                        0, num_channels, work)) {
+                        num_channels, work)) {
    return 0;
  }
  return 1;
@@ -626,9 +600,9 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
 }
 // Emit scaled rows.
-static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
+static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec, uint8_t* in,
-                                uint8_t* in, int in_stride, int mb_h,
+                                int in_stride, int mb_h, uint8_t* const out,
-                                uint8_t* const out, int out_stride) {
+                                int out_stride) {
  const WEBP_CSP_MODE colorspace = dec->output->colorspace;
  int num_lines_in = 0;
  int num_lines_out = 0;
@@ -639,8 +613,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
    const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
    int lines_imported;
    assert(needed_lines > 0 && needed_lines <= lines_left);
-    WebPMultARGBRows(row_in, in_stride,
+    WebPMultARGBRows(row_in, in_stride, dec->rescaler->src_width, needed_lines,
-                     dec->rescaler->src_width, needed_lines, 0);
+                     0);
    lines_imported =
        WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride);
    assert(lines_imported == needed_lines);
@@ -653,10 +627,9 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
 #endif  // WEBP_REDUCE_SIZE
 // Emit rows without any scaling.
-static int EmitRows(WEBP_CSP_MODE colorspace,
+static int EmitRows(WEBP_CSP_MODE colorspace, const uint8_t* row_in,
-                    const uint8_t* row_in, int in_stride,
+                    int in_stride, int mb_w, int mb_h, uint8_t* const out,
-                    int mb_w, int mb_h,
+                    int out_stride) {
                    uint8_t* const out, int out_stride) {
  int lines = mb_h;
  uint8_t* row_out = out;
  while (lines-- > 0) {
@@ -711,8 +684,8 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
  return num_lines_out;
 }
-static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
+static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec, uint8_t* in,
-                                uint8_t* in, int in_stride, int mb_h) {
+                                int in_stride, int mb_h) {
  int num_lines_in = 0;
  int y_pos = dec->last_out_row;
  while (num_lines_in < mb_h) {
@@ -730,9 +703,8 @@ static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
  return y_pos;
 }
-static int EmitRowsYUVA(const VP8LDecoder* const dec,
+static int EmitRowsYUVA(const VP8LDecoder* const dec, const uint8_t* in,
-                        const uint8_t* in, int in_stride,
+                        int in_stride, int mb_w, int num_rows) {
                        int mb_w, int num_rows) {
  int y_pos = dec->last_out_row;
  while (num_rows-- > 0) {
    ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output);
@@ -774,8 +746,8 @@ static int SetCropWindow(VP8Io* const io, int y_start, int y_end,
 //------------------------------------------------------------------------------
-static WEBP_INLINE int GetMetaIndex(
+static WEBP_INLINE int GetMetaIndex(const uint32_t* const image, int xsize,
-    const uint32_t* const image, int xsize, int bits, int x, int y) {
+                                    int bits, int x, int y) {
  if (bits == 0) return 0;
  return image[xsize * (y >> bits) + (x >> bits)];
 }
@@ -793,9 +765,8 @@ static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,
 typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
-static void ApplyInverseTransforms(VP8LDecoder* const dec,
+static void ApplyInverseTransforms(VP8LDecoder* const dec, int start_row,
-                                   int start_row, int num_rows,
+                                   int num_rows, const uint32_t* const rows) {
                                   const uint32_t* const rows) {
  int n = dec->next_transform;
  const int cache_pixs = dec->width * num_rows;
  const int end_row = start_row + num_rows;
@@ -839,18 +810,19 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
            buf->rgba + (ptrdiff_t)dec->last_out_row * buf->stride;
        const int num_rows_out =
 #if !defined(WEBP_REDUCE_SIZE)
-         io->use_scaling ?
+            io->use_scaling ? EmitRescaledRowsRGBA(dec, rows_data, in_stride,
-            EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
+                                                   io->mb_h, rgba, buf->stride)
-                                 rgba, buf->stride) :
+                            :
 #endif  // WEBP_REDUCE_SIZE
                            EmitRows(output->colorspace, rows_data, in_stride,
                                     io->mb_w, io->mb_h, rgba, buf->stride);
        // Update 'last_out_row'.
        dec->last_out_row += num_rows_out;
      } else {  // convert to YUVA
-        dec->last_out_row = io->use_scaling ?
+        dec->last_out_row =
-            EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h) :
+            io->use_scaling
-            EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
+                ? EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h)
                : EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
      }
      assert(dec->last_out_row <= output->height);
    }
@@ -877,9 +849,8 @@ static int Is8bOptimizable(const VP8LMetadata* const hdr) {
  return 1;
 }
-static void AlphaApplyFilter(ALPHDecoder* const alph_dec,
+static void AlphaApplyFilter(ALPHDecoder* const alph_dec, int first_row,
-                             int first_row, int last_row,
+                             int last_row, uint8_t* out, int stride) {
                             uint8_t* out, int stride) {
  if (alph_dec->filter != WEBP_FILTER_NONE) {
    int y;
    const uint8_t* prev_line = alph_dec->prev_line;
@@ -897,9 +868,9 @@ static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int last_row) {
  // For vertical and gradient filtering, we need to decode the part above the
  // crop_top row, in order to have the correct spatial predictors.
  ALPHDecoder* const alph_dec = (ALPHDecoder*)dec->io->opaque;
-  const int top_row =
+  const int top_row = (alph_dec->filter == WEBP_FILTER_NONE ||
-      (alph_dec->filter == WEBP_FILTER_NONE ||
+                       alph_dec->filter == WEBP_FILTER_HORIZONTAL)
-       alph_dec->filter == WEBP_FILTER_HORIZONTAL) ? dec->io->crop_top
+                          ? dec->io->crop_top
                          : dec->last_row;
  const int first_row = (dec->last_row < top_row) ? top_row : dec->last_row;
  assert(last_row <= dec->io->crop_bottom);
@@ -907,13 +878,12 @@ static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int last_row) {
    // Special method for paletted alpha data. We only process the cropped area.
    const int width = dec->io->width;
    uint8_t* out = alph_dec->output + width * first_row;
-    const uint8_t* const in =
+    const uint8_t* const in = (uint8_t*)dec->pixels + dec->width * first_row;
      (uint8_t*)dec->pixels + dec->width * first_row;
    VP8LTransform* const transform = &dec->transforms[0];
    assert(dec->next_transform == 1);
    assert(transform->type == COLOR_INDEXING_TRANSFORM);
-    VP8LColorIndexInverseTransformAlpha(transform, first_row, last_row,
+    VP8LColorIndexInverseTransformAlpha(transform, first_row, last_row, in,
-                                        in, out);
+                                        out);
    AlphaApplyFilter(alph_dec, first_row, last_row, out, width);
  }
  dec->last_row = dec->last_out_row = last_row;
@@ -963,7 +933,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
        pattern |= pattern << 8;
        pattern |= pattern << 16;
 #elif defined(WEBP_USE_MIPS_DSP_R2)
-        __asm__ volatile ("replv.qb %0, %0" : "+r"(pattern));
+        __asm__ volatile("replv.qb %0, %0" : "+r"(pattern));
 #else
        pattern = 0x01010101u * pattern;
 #endif
@@ -977,7 +947,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
 #if defined(__arm__) || defined(_M_ARM)
        pattern |= pattern << 16;
 #elif defined(WEBP_USE_MIPS_DSP_R2)
-        __asm__ volatile ("replv.ph %0, %0" : "+r"(pattern));
+        __asm__ volatile("replv.ph %0, %0" : "+r"(pattern));
 #else
        pattern = 0x00010001u * pattern;
 #endif
@@ -991,7 +961,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
    CopySmallPattern8b(src, dst, length, pattern);
    return;
  }
- Copy:
+Copy:
  if (dist >= length) {  // no overlap -> use memcpy()
    memcpy(dst, src, length * sizeof(*dst));
  } else {
@@ -1001,8 +971,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
 }
 // copy pattern of 1 or 2 uint32_t's
-static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src,
+static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src, uint32_t* dst,
                                            uint32_t* dst,
                                            int length, uint64_t pattern) {
  int i;
  if ((uintptr_t)dst & 4) {  // Align 'dst' to 8-bytes boundary.
@@ -1019,8 +988,8 @@ static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src,
  }
 }
-static WEBP_INLINE void CopyBlock32b(uint32_t* const dst,
+static WEBP_INLINE void CopyBlock32b(uint32_t* const dst, int dist,
-                                     int dist, int length) {
+                                     int length) {
  const uint32_t* const src = dst - dist;
  if (dist <= 2 && length >= 4 && ((uintptr_t)dst & 3) == 0) {
    uint64_t pattern;
@@ -1114,7 +1083,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
  // Process the remaining rows corresponding to last row-block.
  ExtractPalettedAlphaRows(dec, row > last_row ? last_row : row);
- End:
+End:
  br->eos = VP8LIsEndOfStream(br);
  if (!ok || (br->eos && pos < end)) {
    return VP8LSetError(
@@ -1297,7 +1266,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
  }
  return 1;
- Error:
+Error:
  return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
 }
@@ -1314,9 +1283,8 @@ static void ClearTransform(VP8LTransform* const transform) {
 static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
  int i;
  const int final_num_colors = 1 << (8 >> transform->bits);
-  uint32_t* const new_color_map =
+  uint32_t* const new_color_map = (uint32_t*)WebPSafeMalloc(
-      (uint32_t*)WebPSafeMalloc((uint64_t)final_num_colors,
+      (uint64_t)final_num_colors, sizeof(*new_color_map));
                                sizeof(*new_color_map));
  if (new_color_map == NULL) {
    return 0;
  } else {
@@ -1362,10 +1330,9 @@ static int ReadTransform(int* const xsize, int const* ysize,
    case CROSS_COLOR_TRANSFORM:
      transform->bits =
          MIN_TRANSFORM_BITS + VP8LReadBits(br, NUM_TRANSFORM_BITS);
-      ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize,
+      ok = DecodeImageStream(
-                                               transform->bits),
+          VP8LSubSampleSize(transform->xsize, transform->bits),
-                             VP8LSubSampleSize(transform->ysize,
+          VP8LSubSampleSize(transform->ysize, transform->bits),
                                               transform->bits),
          /*is_level0=*/0, dec, &transform->data);
      break;
    case COLOR_INDEXING_TRANSFORM: {
@@ -1464,8 +1431,7 @@ static void UpdateDecoder(VP8LDecoder* const dec, int width, int height) {
  hdr->huffman_mask = (num_bits == 0) ? ~0 : (1 << num_bits) - 1;
 }
-static int DecodeImageStream(int xsize, int ysize,
+static int DecodeImageStream(int xsize, int ysize, int is_level0,
                             int is_level0,
                             VP8LDecoder* const dec,
                             uint32_t** const decoded_data) {
  int ok = 1;
@@ -1532,7 +1498,7 @@ static int DecodeImageStream(int xsize, int ysize,
                       transform_ysize, NULL);
  ok = ok && !br->eos;
- End:
+End:
  if (!ok) {
    WebPSafeFree(data);
    ClearMetadata(hdr);
@@ -1604,8 +1570,8 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int last_row) {
    const uint32_t* const src = dec->argb_cache;
    ApplyInverseTransforms(dec, cur_row, num_rows_to_process, in);
    WebPExtractGreen(src, dst, cache_pixs);
-    AlphaApplyFilter(alph_dec,
+    AlphaApplyFilter(alph_dec, cur_row, cur_row + num_rows_to_process, dst,
-                     cur_row, cur_row + num_rows_to_process, dst, width);
+                     width);
    num_rows -= num_rows_to_process;
    in += num_rows_to_process * dec->width;
    cur_row += num_rows_to_process;
@@ -1658,7 +1624,7 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
  alph_dec->vp8l_dec = dec;
  return 1;
- Err:
+Err:
  VP8LDelete(dec);
  return 0;
 }
@@ -1675,10 +1641,10 @@ int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) {
  if (!alph_dec->use_8b_decode) WebPInitAlphaProcessing();
  // Decode (with special row processing).
-  return alph_dec->use_8b_decode ?
+  return alph_dec->use_8b_decode
-      DecodeAlphaData(dec, (uint8_t*)dec->pixels, dec->width, dec->height,
+             ? DecodeAlphaData(dec, (uint8_t*)dec->pixels, dec->width,
-                      last_row) :
+                               dec->height, last_row)
-      DecodeImageData(dec, dec->pixels, dec->width, dec->height,
+             : DecodeImageData(dec, dec->pixels, dec->width, dec->height,
                               last_row, ExtractAlphaRows);
 }
@@ -1709,7 +1675,7 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
  }
  return 1;
- Error:
+Error:
  VP8LClear(dec);
  assert(dec->status != VP8_STATUS_OK);
  return 0;
@@ -1781,7 +1747,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
  params->last_y = dec->last_out_row;
  return 1;
- Err:
+Err:
  VP8LClear(dec);
  assert(dec->status != VP8_STATUS_OK);
  return 0;
--- a/src/dec/vp8li_dec.h
+++ b/src/dec/vp8li_dec.h
@@ -31,11 +31,7 @@
 extern "C" {
 #endif
-typedef enum {
+typedef enum { READ_DATA = 0, READ_HDR = 1, READ_DIM = 2 } VP8LDecodeState;
  READ_DATA = 0,
  READ_HDR = 1,
  READ_DIM = 2
 } VP8LDecodeState;
 typedef struct VP8LTransform VP8LTransform;
 struct VP8LTransform {
--- a/src/dec/webp_dec.c
+++ b/src/dec/webp_dec.c
@@ -99,8 +99,7 @@ static VP8StatusCode ParseRIFF(const uint8_t** const data,
 // *height_ptr and *flags_ptr are set to the corresponding values extracted
 // from the VP8X chunk.
 static VP8StatusCode ParseVP8X(const uint8_t** const data,
-                               size_t* const data_size,
+                               size_t* const data_size, int* const found_vp8x,
                               int* const found_vp8x,
                               int* const width_ptr, int* const height_ptr,
                               uint32_t* const flags_ptr) {
  const uint32_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
@@ -199,8 +198,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
    // parsed all the optional chunks.
    // Note: This check must occur before the check 'buf_size < disk_chunk_size'
    // below to allow incomplete VP8/VP8L chunks.
-    if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
+    if (!memcmp(buf, "VP8 ", TAG_SIZE) || !memcmp(buf, "VP8L", TAG_SIZE)) {
        !memcmp(buf, "VP8L", TAG_SIZE)) {
      return VP8_STATUS_OK;
    }
@@ -282,10 +280,8 @@ static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
 // RIFF + VP8X + (optional chunks) + VP8(L)
 // ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
 // VP8(L)     <-- Not a valid WebP format: only allowed for internal purpose.
-static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
+static VP8StatusCode ParseHeadersInternal(const uint8_t* data, size_t data_size,
-                                          size_t data_size,
+                                          int* const width, int* const height,
                                          int* const width,
                                          int* const height,
                                          int* const has_alpha,
                                          int* const has_animation,
                                          int* const format,
@@ -319,8 +315,8 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
  // Skip over VP8X.
  {
    uint32_t flags = 0;
-    status = ParseVP8X(&data, &data_size, &found_vp8x,
+    status = ParseVP8X(&data, &data_size, &found_vp8x, &canvas_width,
-                       &canvas_width, &canvas_height, &flags);
+                       &canvas_height, &flags);
    if (status != VP8_STATUS_OK) {
      return status;  // Wrong VP8X / insufficient data.
    }
@@ -403,7 +399,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
    assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
    assert(headers->offset == headers->data_size - data_size);
  }
- ReturnWidthHeight:
+ReturnWidthHeight:
  if (status == VP8_STATUS_OK ||
      (status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) {
    if (has_alpha != NULL) {
@@ -425,9 +421,8 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
  int has_animation = 0;
  assert(headers != NULL);
  // fill out headers, ignore width/height/has_alpha.
-  status = ParseHeadersInternal(headers->data, headers->data_size,
+  status = ParseHeadersInternal(headers->data, headers->data_size, NULL, NULL,
-                                NULL, NULL, NULL, &has_animation,
+                                NULL, &has_animation, NULL, headers);
                                NULL, headers);
  if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
    // The WebPDemux API + libwebp can be used to decode individual
    // uncomposited frames or the WebPAnimDecoder can be used to fully
@@ -492,8 +487,8 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
                                     params->output);
      if (status == VP8_STATUS_OK) {  // Decode
        // This change must be done before calling VP8Decode()
-        dec->mt_method = VP8GetThreadMethod(params->options, &headers,
+        dec->mt_method =
-                                            io.width, io.height);
+            VP8GetThreadMethod(params->options, &headers, io.width, io.height);
        VP8InitDithering(params->options, dec);
        if (!VP8Decode(dec, &io)) {
          status = dec->status;
@@ -582,10 +577,10 @@ uint8_t* WebPDecodeBGRAInto(const uint8_t* data, size_t data_size,
  return DecodeIntoRGBABuffer(MODE_BGRA, data, data_size, output, stride, size);
 }
-uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size,
+uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size, uint8_t* luma,
-                           uint8_t* luma, size_t luma_size, int luma_stride,
+                           size_t luma_size, int luma_stride, uint8_t* u,
-                           uint8_t* u, size_t u_size, int u_stride,
+                           size_t u_size, int u_stride, uint8_t* v,
-                           uint8_t* v, size_t v_size, int v_stride) {
+                           size_t v_size, int v_stride) {
  WebPDecParams params;
  WebPDecBuffer output;
  if (luma == NULL || !WebPInitDecBuffer(&output)) return NULL;
@@ -643,34 +638,34 @@ WEBP_NODISCARD static uint8_t* Decode(WEBP_CSP_MODE mode,
  return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y;
 }
-uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size, int* width,
-                       int* width, int* height) {
+                       int* height) {
  return Decode(MODE_RGB, data, data_size, width, height, NULL);
 }
-uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size,
+uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size, int* width,
-                        int* width, int* height) {
+                        int* height) {
  return Decode(MODE_RGBA, data, data_size, width, height, NULL);
 }
-uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size,
+uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size, int* width,
-                        int* width, int* height) {
+                        int* height) {
  return Decode(MODE_ARGB, data, data_size, width, height, NULL);
 }
-uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
+uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, int* width,
-                       int* width, int* height) {
+                       int* height) {
  return Decode(MODE_BGR, data, data_size, width, height, NULL);
 }
-uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
+uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size, int* width,
-                        int* width, int* height) {
+                        int* height) {
  return Decode(MODE_BGRA, data, data_size, width, height, NULL);
 }
-uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
+uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width,
-                       int* width, int* height, uint8_t** u, uint8_t** v,
+                       int* height, uint8_t** u, uint8_t** v, int* stride,
-                       int* stride, int* uv_stride) {
+                       int* uv_stride) {
  // data, width and height are checked by Decode().
  if (u == NULL || v == NULL || stride == NULL || uv_stride == NULL) {
    return NULL;
@@ -678,8 +673,8 @@ uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
  {
    WebPDecBuffer output;  // only to preserve the side-infos
-    uint8_t* const out = Decode(MODE_YUV, data, data_size,
+    uint8_t* const out =
-                                width, height, &output);
+        Decode(MODE_YUV, data, data_size, width, height, &output);
    if (out != NULL) {
      const WebPYUVABuffer* const buf = &output.u.YUVA;
@@ -706,17 +701,16 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
  DefaultFeatures(features);
  // Only parse enough of the data to retrieve the features.
-  return ParseHeadersInternal(data, data_size,
+  return ParseHeadersInternal(
-                              &features->width, &features->height,
+      data, data_size, &features->width, &features->height,
-                              &features->has_alpha, &features->has_animation,
+      &features->has_alpha, &features->has_animation, &features->format, NULL);
                              &features->format, NULL);
 }
 //------------------------------------------------------------------------------
 // WebPGetInfo()
-int WebPGetInfo(const uint8_t* data, size_t data_size,
+int WebPGetInfo(const uint8_t* data, size_t data_size, int* width,
-                int* width, int* height) {
+                int* height) {
  WebPBitstreamFeatures features;
  if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) {
@@ -736,8 +730,7 @@ int WebPGetInfo(const uint8_t* data, size_t data_size,
 //------------------------------------------------------------------------------
 // Advance decoding API
-int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
+int WebPInitDecoderConfigInternal(WebPDecoderConfig* config, int version) {
                                  int version) {
  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
    return 0;  // version mismatch
  }
@@ -864,8 +857,8 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
 //------------------------------------------------------------------------------
 // Cropping and rescaling.
-int WebPCheckCropDimensions(int image_width, int image_height,
+int WebPCheckCropDimensions(int image_width, int image_height, int x, int y,
-                            int x, int y, int w, int h) {
+                            int w, int h) {
  return WebPCheckCropDimensionsBasic(x, y, w, h) &&
         !(x >= image_width || w > image_width || w > image_width - x ||
           y >= image_height || h > image_height || h > image_height - y);
@@ -921,8 +914,8 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
  if (io->use_scaling) {
    // disable filter (only for large downscaling ratio).
-    io->bypass_filtering |= (io->scaled_width < W * 3 / 4) &&
+    io->bypass_filtering |=
-                            (io->scaled_height < H * 3 / 4);
+        (io->scaled_width < W * 3 / 4) && (io->scaled_height < H * 3 / 4);
    io->fancy_upsampling = 0;
  }
  return 1;
--- a/src/dec/webpi_dec.h
+++ b/src/dec/webpi_dec.h
@@ -37,13 +37,13 @@ typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos,
 struct WebPDecParams {
  WebPDecBuffer* output;           // output buffer.
-  uint8_t* tmp_y, *tmp_u, *tmp_v;    // cache for the fancy upsampler
+  uint8_t *tmp_y, *tmp_u, *tmp_v;  // cache for the fancy upsampler
                                   // or used for tmp rescaling
  int last_y;  // coordinate of the line that was last output
  const WebPDecoderOptions* options;  // if not NULL, use alt decoding features
-  WebPRescaler* scaler_y, *scaler_u, *scaler_v, *scaler_a;  // rescalers
+  WebPRescaler *scaler_y, *scaler_u, *scaler_v, *scaler_a;  // rescalers
  void* memory;  // overall scratch memory for the output work.
  OutputFunc emit;               // output RGB or YUV samples
@@ -82,8 +82,8 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
 // Misc utils
 // Returns true if crop dimensions are within image bounds.
-int WebPCheckCropDimensions(int image_width, int image_height,
+int WebPCheckCropDimensions(int image_width, int image_height, int x, int y,
-                            int x, int y, int w, int h);
+                            int w, int h);
 // Initializes VP8Io with custom setup, io and teardown functions. The default
 // hooks will use the supplied 'params' as io->opaque handle.
--- a/src/demux/anim_decode.c
+++ b/src/demux/anim_decode.c
@@ -79,8 +79,8 @@ WEBP_NODISCARD static int ApplyDecoderOptions(
  assert(dec_options != NULL);
  mode = dec_options->color_mode;
-  if (mode != MODE_RGBA && mode != MODE_BGRA &&
+  if (mode != MODE_RGBA && mode != MODE_BGRA && mode != MODE_rgbA &&
-      mode != MODE_rgbA && mode != MODE_bgrA) {
+      mode != MODE_bgrA) {
    return 0;
  }
  dec->blend_func = (mode == MODE_RGBA || mode == MODE_BGRA)
@@ -145,7 +145,7 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(
  WebPAnimDecoderReset(dec);
  return dec;
- Error:
+Error:
  WebPAnimDecoderDelete(dec);
  return NULL;
 }
@@ -197,13 +197,13 @@ WEBP_NODISCARD static int CopyCanvas(const uint8_t* src, uint8_t* dst,
 // Returns true if the current frame is a key-frame.
 static int IsKeyFrame(const WebPIterator* const curr,
                      const WebPIterator* const prev,
-                      int prev_frame_was_key_frame,
+                      int prev_frame_was_key_frame, int canvas_width,
-                      int canvas_width, int canvas_height) {
+                      int canvas_height) {
  if (curr->frame_num == 1) {
    return 1;
  } else if ((!curr->has_alpha || curr->blend_method == WEBP_MUX_NO_BLEND) &&
-             IsFullFrame(curr->width, curr->height,
+             IsFullFrame(curr->width, curr->height, canvas_width,
-                         canvas_width, canvas_height)) {
+                         canvas_height)) {
    return 1;
  } else {
    return (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) &&
@@ -213,12 +213,11 @@ static int IsKeyFrame(const WebPIterator* const curr,
  }
 }
 // Blend a single channel of 'src' over 'dst', given their alpha channel values.
 // 'src' and 'dst' are assumed to be NOT pre-multiplied by alpha.
-static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
+static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a, uint32_t dst,
-                                      uint32_t dst, uint8_t dst_a,
+                                      uint8_t dst_a, uint32_t scale,
-                                      uint32_t scale, int shift) {
+                                      int shift) {
  const uint8_t src_channel = (src >> shift) & 0xff;
  const uint8_t dst_channel = (dst >> shift) & 0xff;
  const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
@@ -328,8 +327,8 @@ static void FindBlendRangeAtRow(const WebPIterator* const src,
  }
 }
-int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
+int WebPAnimDecoderGetNext(WebPAnimDecoder* dec, uint8_t** buf_ptr,
-                           uint8_t** buf_ptr, int* timestamp_ptr) {
+                           int* timestamp_ptr) {
  WebPIterator iter;
  uint32_t width;
  uint32_t height;
@@ -358,8 +357,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
      goto Error;
    }
  } else {
-    if (!CopyCanvas(dec->prev_frame_disposed, dec->curr_frame,
+    if (!CopyCanvas(dec->prev_frame_disposed, dec->curr_frame, width, height)) {
                    width, height)) {
      goto Error;
    }
  }
@@ -394,8 +392,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
      int y;
      // Blend transparent pixels with pixels in previous canvas.
      for (y = 0; y < iter.height; ++y) {
-        const size_t offset =
+        const size_t offset = (iter.y_offset + y) * width + iter.x_offset;
            (iter.y_offset + y) * width + iter.x_offset;
        blend_row((uint32_t*)dec->curr_frame + offset,
                  (uint32_t*)dec->prev_frame_disposed + offset, iter.width);
      }
@@ -445,7 +442,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
  *timestamp_ptr = timestamp;
  return 1;
- Error:
+Error:
  WebPDemuxReleaseIterator(&iter);
  return 0;
 }
--- a/src/demux/demux.c
+++ b/src/demux/demux.c
@@ -76,11 +76,7 @@ struct WebPDemuxer {
  Chunk** chunks_tail;
 };
-typedef enum {
+typedef enum { PARSE_OK, PARSE_NEED_MORE_DATA, PARSE_ERROR } ParseStatus;
  PARSE_OK,
  PARSE_NEED_MORE_DATA,
  PARSE_ERROR
 } ParseStatus;
 typedef struct ChunkParser {
  uint8_t id[4];
@@ -94,10 +90,10 @@ static int IsValidSimpleFormat(const WebPDemuxer* const dmux);
 static int IsValidExtendedFormat(const WebPDemuxer* const dmux);
 static const ChunkParser kMasterChunks[] = {
-  { { 'V', 'P', '8', ' ' }, ParseSingleImage, IsValidSimpleFormat },
+    {{'V', 'P', '8', ' '}, ParseSingleImage, IsValidSimpleFormat},
-  { { 'V', 'P', '8', 'L' }, ParseSingleImage, IsValidSimpleFormat },
+    {{'V', 'P', '8', 'L'}, ParseSingleImage, IsValidSimpleFormat},
-  { { 'V', 'P', '8', 'X' }, ParseVP8X,        IsValidExtendedFormat },
+    {{'V', 'P', '8', 'X'}, ParseVP8X, IsValidExtendedFormat},
-  { { '0', '0', '0', '0' }, NULL,             NULL },
+    {{'0', '0', '0', '0'}, NULL, NULL},
 };
 //------------------------------------------------------------------------------
@@ -109,8 +105,8 @@ int WebPGetDemuxVersion(void) {
 // -----------------------------------------------------------------------------
 // MemBuffer
-static int RemapMemBuffer(MemBuffer* const mem,
+static int RemapMemBuffer(MemBuffer* const mem, const uint8_t* data,
-                          const uint8_t* data, size_t size) {
+                          size_t size) {
  if (size < mem->buf_size) return 0;  // can't remap to a shorter buffer!
  mem->buf = data;
@@ -118,8 +114,8 @@ static int RemapMemBuffer(MemBuffer* const mem,
  return 1;
 }
-static int InitMemBuffer(MemBuffer* const mem,
+static int InitMemBuffer(MemBuffer* const mem, const uint8_t* data,
-                         const uint8_t* data, size_t size) {
+                         size_t size) {
  memset(mem, 0, sizeof(*mem));
  return RemapMemBuffer(mem, data, size);
 }
@@ -195,8 +191,8 @@ static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
  return 1;
 }
-static void SetFrameInfo(size_t start_offset, size_t size,
+static void SetFrameInfo(size_t start_offset, size_t size, int frame_num,
-                         int frame_num, int complete,
+                         int complete,
                         const WebPBitstreamFeatures* const features,
                         Frame* const frame) {
  frame->img_components[0].offset = start_offset;
@@ -214,8 +210,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
                              MemBuffer* const mem, Frame* const frame) {
  int alpha_chunks = 0;
  int image_chunks = 0;
-  int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
+  int done =
-              MemDataSize(mem) < min_size);
+      (MemDataSize(mem) < CHUNK_HEADER_SIZE || MemDataSize(mem) < min_size);
  ParseStatus status = PARSE_OK;
  if (done) return PARSE_NEED_MORE_DATA;
@@ -232,7 +228,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
    payload_size_padded = payload_size + (payload_size & 1);
    payload_available = (payload_size_padded > MemDataSize(mem))
-                      ? MemDataSize(mem) : payload_size_padded;
+                            ? MemDataSize(mem)
                            : payload_size_padded;
    chunk_size = CHUNK_HEADER_SIZE + payload_available;
    if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR;
    if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA;
@@ -258,9 +255,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
          // Extract the bitstream features, tolerating failures when the data
          // is incomplete.
          WebPBitstreamFeatures features;
-          const VP8StatusCode vp8_status =
+          const VP8StatusCode vp8_status = WebPGetFeatures(
-              WebPGetFeatures(mem->buf + chunk_start_offset, chunk_size,
+              mem->buf + chunk_start_offset, chunk_size, &features);
                              &features);
          if (status == PARSE_NEED_MORE_DATA &&
              vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
            return PARSE_NEED_MORE_DATA;
@@ -298,9 +294,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
 // enough data ('min_size') to parse the payload.
 // Returns PARSE_OK on success with *frame pointing to the new Frame.
 // Returns PARSE_NEED_MORE_DATA with insufficient data, PARSE_ERROR otherwise.
-static ParseStatus NewFrame(const MemBuffer* const mem,
+static ParseStatus NewFrame(const MemBuffer* const mem, uint32_t min_size,
-                            uint32_t min_size, uint32_t actual_size,
+                            uint32_t actual_size, Frame** frame) {
                            Frame** frame) {
  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
  if (actual_size < min_size) return PARSE_ERROR;
  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
@@ -311,8 +306,8 @@ static ParseStatus NewFrame(const MemBuffer* const mem,
 // Parse a 'ANMF' chunk and any image bearing chunks that immediately follow.
 // 'frame_chunk_size' is the previously validated, padded chunk size.
-static ParseStatus ParseAnimationFrame(
+static ParseStatus ParseAnimationFrame(WebPDemuxer* const dmux,
-    WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
+                                       uint32_t frame_chunk_size) {
  const int is_animation = !!(dmux->feature_flags & ANIMATION_FLAG);
  const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
  int added_frame = 0;
@@ -320,8 +315,7 @@ static ParseStatus ParseAnimationFrame(
  MemBuffer* const mem = &dmux->mem;
  Frame* frame;
  size_t start_offset;
-  ParseStatus status =
+  ParseStatus status = NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
      NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
  if (status != PARSE_OK) return status;
  frame->x_offset = 2 * ReadLE24s(mem);
@@ -362,8 +356,8 @@ static ParseStatus ParseAnimationFrame(
 // the user to request the payload via a fourcc string. 'size' includes the
 // header and the unpadded payload size.
 // Returns true on success, false otherwise.
-static int StoreChunk(WebPDemuxer* const dmux,
+static int StoreChunk(WebPDemuxer* const dmux, size_t start_offset,
-                      size_t start_offset, uint32_t size) {
+                      uint32_t size) {
  Chunk* const chunk = (Chunk*)WebPSafeCalloc(1ULL, sizeof(*chunk));
  if (chunk == NULL) return 0;
@@ -647,8 +641,8 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
      }
      if (f->width > 0 && f->height > 0 &&
-          !CheckFrameBounds(f, !is_animation,
+          !CheckFrameBounds(f, !is_animation, dmux->canvas_width,
-                            dmux->canvas_width, dmux->canvas_height)) {
+                            dmux->canvas_height)) {
        return 0;
      }
    }
@@ -788,12 +782,18 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
  if (dmux == NULL) return 0;
  switch (feature) {
-    case WEBP_FF_FORMAT_FLAGS:     return dmux->feature_flags;
+    case WEBP_FF_FORMAT_FLAGS:
-    case WEBP_FF_CANVAS_WIDTH:     return (uint32_t)dmux->canvas_width;
+      return dmux->feature_flags;
-    case WEBP_FF_CANVAS_HEIGHT:    return (uint32_t)dmux->canvas_height;
+    case WEBP_FF_CANVAS_WIDTH:
-    case WEBP_FF_LOOP_COUNT:       return (uint32_t)dmux->loop_count;
+      return (uint32_t)dmux->canvas_width;
-    case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor;
+    case WEBP_FF_CANVAS_HEIGHT:
-    case WEBP_FF_FRAME_COUNT:      return (uint32_t)dmux->num_frames;
+      return (uint32_t)dmux->canvas_height;
    case WEBP_FF_LOOP_COUNT:
      return (uint32_t)dmux->loop_count;
    case WEBP_FF_BACKGROUND_COLOR:
      return dmux->bgcolor;
    case WEBP_FF_FRAME_COUNT:
      return (uint32_t)dmux->num_frames;
  }
  return 0;
 }
@@ -822,8 +822,8 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
    // if alpha exists it precedes image, update the size allowing for
    // intervening chunks.
    if (alpha->size > 0) {
-      const size_t inter_size = (image->offset > 0)
+      const size_t inter_size =
-                              ? image->offset - (alpha->offset + alpha->size)
+          (image->offset > 0) ? image->offset - (alpha->offset + alpha->size)
                              : 0;
      start_offset = alpha->offset;
      *data_size += alpha->size + inter_size;
@@ -835,8 +835,7 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
 // Create a whole 'frame' from VP8 (+ alpha) or lossless.
 static int SynthesizeFrame(const WebPDemuxer* const dmux,
-                           const Frame* const frame,
+                           const Frame* const frame, WebPIterator* const iter) {
                           WebPIterator* const iter) {
  const uint8_t* const mem_buf = dmux->mem.buf;
  size_t payload_size = 0;
  const uint8_t* const payload = GetFramePayload(mem_buf, frame, &payload_size);
@@ -891,9 +890,7 @@ int WebPDemuxPrevFrame(WebPIterator* iter) {
  return SetFrame(iter->frame_num - 1, iter);
 }
-void WebPDemuxReleaseIterator(WebPIterator* iter) {
+void WebPDemuxReleaseIterator(WebPIterator* iter) { (void)iter; }
  (void)iter;
 }
 // -----------------------------------------------------------------------------
 // Chunk iteration
@@ -944,9 +941,8 @@ static int SetChunk(const char fourcc[4], int chunk_num,
  return 0;
 }
-int WebPDemuxGetChunk(const WebPDemuxer* dmux,
+int WebPDemuxGetChunk(const WebPDemuxer* dmux, const char fourcc[4],
-                      const char fourcc[4], int chunk_num,
+                      int chunk_num, WebPChunkIterator* iter) {
                      WebPChunkIterator* iter) {
  if (iter == NULL) return 0;
  memset(iter, 0, sizeof(*iter));
@@ -972,6 +968,4 @@ int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
  return 0;
 }
-void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) {
+void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) { (void)iter; }
  (void)iter;
 }
--- a/src/dsp/alpha_processing.c
+++ b/src/dsp/alpha_processing.c
@@ -23,7 +23,6 @@
 #define USE_TABLES_FOR_ALPHA_MULT 0  // ALTERNATE_CODE
 #endif
 // -----------------------------------------------------------------------------
 #define MFIX 24  // 24bit fixed-point arithmetic
@@ -39,8 +38,8 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
 #if (USE_TABLES_FOR_ALPHA_MULT == 1)
 static const uint32_t kMultTables[2][256] = {
-  {    // (255u << MFIX) / alpha
+    // (255u << MFIX) / alpha
-    0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
+    {0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
     0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
     0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
     0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
@@ -82,9 +81,9 @@ static const uint32_t kMultTables[2][256] = {
     0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
     0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
     0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
-    0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
+     0x01030c30, 0x01020612, 0x01010204, 0x01000000},
-  {   // alpha * KINV_255
+    // alpha * KINV_255
-    0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
+    {0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
     0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
     0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
     0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
@@ -126,8 +125,7 @@ static const uint32_t kMultTables[2][256] = {
     0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
     0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
     0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
-    0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
+     0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff}};
 };
 static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
  return kMultTables[!inverse][a];
@@ -162,8 +160,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
 }
 void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
-                   const uint8_t* WEBP_RESTRICT const alpha,
+                   const uint8_t* WEBP_RESTRICT const alpha, int width,
-                   int width, int inverse) {
+                   int inverse) {
  int x;
  for (x = 0; x < width; ++x) {
    const uint32_t a = alpha[x];
@@ -184,8 +182,8 @@ void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
 void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
 void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
-                    const uint8_t* WEBP_RESTRICT const alpha,
+                    const uint8_t* WEBP_RESTRICT const alpha, int width,
-                    int width, int inverse);
+                    int inverse);
 //------------------------------------------------------------------------------
 // Generic per-plane calls
@@ -227,8 +225,8 @@ void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
 #endif
 #if !WEBP_NEON_OMIT_C_CODE
-static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
+static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first, int w, int h,
-                                 int w, int h, int stride) {
+                                 int stride) {
  while (h-- > 0) {
    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
@@ -265,8 +263,8 @@ static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
  return (x * m) >> 16;
 }
-static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
+static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444, int w,
-                                                 int w, int h, int stride,
+                                                 int h, int stride,
                                                 int rg_byte_pos /* 0 or 1 */) {
  while (h-- > 0) {
    int i;
@@ -286,8 +284,8 @@ static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
 }
 #undef MULTIPLIER
-static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
+static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444, int w, int h,
-                                     int w, int h, int stride) {
+                                     int stride) {
 #if (WEBP_SWAP_16BIT_CSP == 1)
  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
 #else
@@ -297,8 +295,8 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
 #if !WEBP_NEON_OMIT_C_CODE
 static int DispatchAlpha_C(const uint8_t* WEBP_RESTRICT alpha, int alpha_stride,
-                           int width, int height,
+                           int width, int height, uint8_t* WEBP_RESTRICT dst,
-                           uint8_t* WEBP_RESTRICT dst, int dst_stride) {
+                           int dst_stride) {
  uint32_t alpha_mask = 0xff;
  int i, j;
@@ -330,8 +328,8 @@ static void DispatchAlphaToGreen_C(const uint8_t* WEBP_RESTRICT alpha,
 }
 static int ExtractAlpha_C(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
-                          int width, int height,
+                          int width, int height, uint8_t* WEBP_RESTRICT alpha,
-                          uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
+                          int alpha_stride) {
  uint8_t alpha_mask = 0xff;
  int i, j;
@@ -357,19 +355,22 @@ static void ExtractGreen_C(const uint32_t* WEBP_RESTRICT argb,
 //------------------------------------------------------------------------------
 static int HasAlpha8b_C(const uint8_t* src, int length) {
-  while (length-- > 0) if (*src++ != 0xff) return 1;
+  while (length-- > 0)
    if (*src++ != 0xff) return 1;
  return 0;
 }
 static int HasAlpha32b_C(const uint8_t* src, int length) {
  int x;
-  for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
+  for (x = 0; length-- > 0; x += 4)
    if (src[x] != 0xff) return 1;
  return 0;
 }
 static void AlphaReplace_C(uint32_t* src, int length, uint32_t color) {
  int x;
-  for (x = 0; x < length; ++x) if ((src[x] >> 24) == 0) src[x] = color;
+  for (x = 0; x < length; ++x)
    if ((src[x] >> 24) == 0) src[x] = color;
 }
 //------------------------------------------------------------------------------
@@ -383,8 +384,8 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
 static void PackARGB_C(const uint8_t* WEBP_RESTRICT a,
                       const uint8_t* WEBP_RESTRICT r,
                       const uint8_t* WEBP_RESTRICT g,
-                       const uint8_t* WEBP_RESTRICT b,
+                       const uint8_t* WEBP_RESTRICT b, int len,
-                       int len, uint32_t* WEBP_RESTRICT out) {
+                       uint32_t* WEBP_RESTRICT out) {
  int i;
  for (i = 0; i < len; ++i) {
    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
@@ -394,8 +395,8 @@ static void PackARGB_C(const uint8_t* WEBP_RESTRICT a,
 static void PackRGB_C(const uint8_t* WEBP_RESTRICT r,
                      const uint8_t* WEBP_RESTRICT g,
-                      const uint8_t* WEBP_RESTRICT b,
+                      const uint8_t* WEBP_RESTRICT b, int len, int step,
-                      int len, int step, uint32_t* WEBP_RESTRICT out) {
+                      uint32_t* WEBP_RESTRICT out) {
  int i, offset = 0;
  for (i = 0; i < len; ++i) {
    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
@@ -419,8 +420,8 @@ void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
 #endif
 void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
                    const uint8_t* WEBP_RESTRICT g,
-                    const uint8_t* WEBP_RESTRICT b,
+                    const uint8_t* WEBP_RESTRICT b, int len, int step,
-                    int len, int step, uint32_t* WEBP_RESTRICT out);
+                    uint32_t* WEBP_RESTRICT out);
 int (*WebPHasAlpha8b)(const uint8_t* src, int length);
 int (*WebPHasAlpha32b)(const uint8_t* src, int length);
--- a/src/dsp/alpha_processing_mips_dsp_r2.c
+++ b/src/dsp/alpha_processing_mips_dsp_r2.c
@@ -17,8 +17,8 @@
 #if defined(WEBP_USE_MIPS_DSP_R2)
 static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
-                                   int width, int height,
+                                   int width, int height, uint8_t* dst,
-                                   uint8_t* dst, int dst_stride) {
+                                   int dst_stride) {
  uint32_t alpha_mask = 0xffffffff;
  int i, j, temp0;
@@ -28,7 +28,7 @@ static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
    for (i = 0; i < (width >> 2); ++i) {
      int temp1, temp2, temp3;
-      __asm__ volatile (
+      __asm__ volatile(
          "ulw    %[temp0],      0(%[palpha])                \n\t"
          "addiu  %[palpha],     %[palpha],     4            \n\t"
          "addiu  %[pdst],       %[pdst],       16           \n\t"
@@ -40,47 +40,43 @@ static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
          "sb     %[temp1],      -12(%[pdst])                \n\t"
          "sb     %[temp2],      -8(%[pdst])                 \n\t"
          "sb     %[temp3],      -4(%[pdst])                 \n\t"
-        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+          : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-          [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
+            [temp3] "=&r"(temp3), [palpha] "+r"(palpha), [pdst] "+r"(pdst),
-          [alpha_mask]"+r"(alpha_mask)
+            [alpha_mask] "+r"(alpha_mask)
          :
-        : "memory"
+          : "memory");
      );
    }
    for (i = 0; i < (width & 3); ++i) {
-      __asm__ volatile (
+      __asm__ volatile(
          "lbu    %[temp0],      0(%[palpha])                \n\t"
          "addiu  %[palpha],     %[palpha],     1            \n\t"
          "sb     %[temp0],      0(%[pdst])                  \n\t"
          "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
          "addiu  %[pdst],       %[pdst],       4            \n\t"
-        : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
+          : [temp0] "=&r"(temp0), [palpha] "+r"(palpha), [pdst] "+r"(pdst),
-          [alpha_mask]"+r"(alpha_mask)
+            [alpha_mask] "+r"(alpha_mask)
          :
-        : "memory"
+          : "memory");
      );
    }
    alpha += alpha_stride;
    dst += dst_stride;
  }
-  __asm__ volatile (
+  __asm__ volatile(
      "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
      "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
      "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
      "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
      "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
      "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
-    : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
+      : [temp0] "=&r"(temp0), [alpha_mask] "+r"(alpha_mask)
-    :
+      :);
  );
  return (alpha_mask != 0xff);
 }
-static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
+static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width, int inverse) {
                                  int inverse) {
  int x;
  const uint32_t c_00ffffff = 0x00ffffffu;
  const uint32_t c_ff000000 = 0xff000000u;
@@ -93,7 +89,7 @@ static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
        ptr[x] = 0;
      } else {
        int temp0, temp1, temp2, temp3, alpha;
-        __asm__ volatile (
+        __asm__ volatile(
            "srl          %[alpha],   %[argb],       24                \n\t"
            "replv.qb     %[temp0],   %[alpha]                         \n\t"
            "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
@@ -112,13 +108,12 @@ static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
            "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
            "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
            "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
-          : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+            : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-            [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
+              [temp3] "=&r"(temp3), [alpha] "=&r"(alpha)
-          : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
+            : [inverse] "r"(inverse), [c_00ffffff] "r"(c_00ffffff),
-            [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
+              [c_8000000] "r"(c_8000000), [c_8000080] "r"(c_8000080),
-            [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
+              [c_ff000000] "r"(c_ff000000), [argb] "r"(argb)
-          : "memory", "hi", "lo"
+            : "memory", "hi", "lo");
        );
        ptr[x] = temp1;
      }
    }
@@ -133,7 +128,7 @@ static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
  const int rest = len & 1;
  const uint32_t* const loop_end = out + len - rest;
  const int step = 4;
-  __asm__ volatile (
+  __asm__ volatile(
      "xor          %[offset],   %[offset], %[offset]    \n\t"
      "beq          %[loop_end], %[out],    0f           \n\t"
      "2:                                                  \n\t"
@@ -159,12 +154,11 @@ static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
      "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
      "sw           %[temp0],    0(%[out])               \n\t"
      "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
+        [temp3] "=&r"(temp3), [offset] "=&r"(offset), [out] "+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      : [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
+        [loop_end] "r"(loop_end), [rest] "r"(rest)
-    : "memory"
+      : "memory");
  );
 }
 #endif  // WORDS_BIGENDIAN
@@ -175,7 +169,7 @@ static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
  const int rest = len & 1;
  const int a = 0xff;
  const uint32_t* const loop_end = out + len - rest;
-  __asm__ volatile (
+  __asm__ volatile(
      "xor          %[offset],   %[offset], %[offset]    \n\t"
      "beq          %[loop_end], %[out],    0f           \n\t"
      "2:                                                  \n\t"
@@ -199,12 +193,11 @@ static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
      "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
      "sw           %[temp0],    0(%[out])               \n\t"
      "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [offset]"=&r"(offset), [out]"+&r"(out)
+        [offset] "=&r"(offset), [out] "+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      : [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
+        [loop_end] "r"(loop_end), [rest] "r"(rest)
-    : "memory"
+      : "memory");
  );
 }
 //------------------------------------------------------------------------------
--- a/src/dsp/alpha_processing_neon.c
+++ b/src/dsp/alpha_processing_neon.c
@@ -22,7 +22,8 @@
 #define MULTIPLIER(a) ((a) * 0x8081)
 #define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
-#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER) do {                        \
+#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER)                   \
  do {                                                       \
    const uint8x8_t alpha = (V).val[(ALPHA)];                \
    const uint16x8_t r1 = vmull_u8((V).val[1], alpha);       \
    const uint16x8_t g1 = vmull_u8((V).val[2], alpha);       \
@@ -37,10 +38,10 @@
    (V).val[1] = vshrn_n_u16(r3, 8);                         \
    (V).val[2] = vshrn_n_u16(g3, 8);                         \
    (V).val[(OTHER)] = vshrn_n_u16(b3, 8);                   \
-} while (0)
+  } while (0)
-static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
+static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first, int w,
-                                    int w, int h, int stride) {
+                                    int h, int stride) {
  const uint16x8_t kOne = vdupq_n_u16(1u);
  while (h-- > 0) {
    uint32_t* const rgbx = (uint32_t*)rgba;
--- a/src/dsp/alpha_processing_sse2.c
+++ b/src/dsp/alpha_processing_sse2.c
@@ -16,8 +16,8 @@
 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>
 #include "src/webp/types.h"
 #include "src/dsp/cpu.h"
 #include "src/webp/types.h"
 //------------------------------------------------------------------------------
@@ -204,7 +204,8 @@ static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb,
 // immediate in the _mm_shufflexx_epi16() instruction. We really need a macro.
 // We use: v / 255 = (v * 0x8081) >> 23, where v = alpha * {r,g,b} is a 16bit
 // value.
-#define APPLY_ALPHA(RGBX, SHUFFLE) do {                              \
+#define APPLY_ALPHA(RGBX, SHUFFLE)                                     \
  do {                                                                 \
    const __m128i argb0 = _mm_loadu_si128((const __m128i*)&(RGBX));    \
    const __m128i argb1_lo = _mm_unpacklo_epi8(argb0, zero);           \
    const __m128i argb1_hi = _mm_unpackhi_epi8(argb0, zero);           \
@@ -223,10 +224,10 @@ static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb,
    const __m128i A2_hi = _mm_srli_epi16(A1_hi, 7);                    \
    const __m128i A3 = _mm_packus_epi16(A2_lo, A2_hi);                 \
    _mm_storeu_si128((__m128i*)&(RGBX), A3);                           \
-} while (0)
+  } while (0)
-static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
+static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first, int w,
-                                    int w, int h, int stride) {
+                                    int h, int stride) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i kMult = _mm_set1_epi16((short)0x8081);
  const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0);
@@ -273,7 +274,8 @@ static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
    const int mask = _mm_movemask_epi8(bits);
    if (mask != 0xffff) return 1;
  }
-  for (; i < length; ++i) if (src[i] != 0xff) return 1;
+  for (; i < length; ++i)
    if (src[i] != 0xff) return 1;
  return 0;
 }
@@ -312,7 +314,8 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
    const int mask = _mm_movemask_epi8(bits);
    if (mask != 0xffff) return 1;
  }
-  for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
+  for (; i <= length; i += 4)
    if (src[i] != 0xff) return 1;
  return 0;
 }
@@ -334,7 +337,8 @@ static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) {
    _mm_storeu_si128((__m128i*)(src + i + 0), _mm_or_si128(d0, e0));
    _mm_storeu_si128((__m128i*)(src + i + 4), _mm_or_si128(d1, e1));
  }
-  for (; i < length; ++i) if ((src[i] >> 24) == 0) src[i] = color;
+  for (; i < length; ++i)
    if ((src[i] >> 24) == 0) src[i] = color;
 }
 // -----------------------------------------------------------------------------
@@ -369,8 +373,8 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
 }
 static void MultRow_SSE2(uint8_t* WEBP_RESTRICT const ptr,
-                         const uint8_t* WEBP_RESTRICT const alpha,
+                         const uint8_t* WEBP_RESTRICT const alpha, int width,
-                         int width, int inverse) {
+                         int inverse) {
  int x = 0;
  if (!inverse) {
    const __m128i zero = _mm_setzero_si128();
--- a/src/dsp/alpha_processing_sse41.c
+++ b/src/dsp/alpha_processing_sse41.c
@@ -12,8 +12,8 @@
 // Author: Skal (pascal.massimino@gmail.com)
 #include "src/dsp/cpu.h"
 #include "src/webp/types.h"
 #include "src/dsp/dsp.h"
 #include "src/webp/types.h"
 #if defined(WEBP_USE_SSE41)
 #include <emmintrin.h>
@@ -35,14 +35,14 @@ static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb,
  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
  // last byte of the quadruplet.
  const int limit = (width - 1) & ~15;
-  const __m128i kCstAlpha0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
+  const __m128i kCstAlpha0 =
-                                          -1, -1, -1, -1, 12, 8, 4, 0);
+      _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0);
-  const __m128i kCstAlpha1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
+  const __m128i kCstAlpha1 =
-                                          12, 8, 4, 0, -1, -1, -1, -1);
+      _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0, -1, -1, -1, -1);
-  const __m128i kCstAlpha2 = _mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0,
+  const __m128i kCstAlpha2 =
-                                          -1, -1, -1, -1, -1, -1, -1, -1);
+      _mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, -1, -1);
-  const __m128i kCstAlpha3 = _mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1,
+  const __m128i kCstAlpha3 =
-                                          -1, -1, -1, -1, -1, -1, -1, -1);
+      _mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
  for (j = 0; j < height; ++j) {
    const __m128i* src = (const __m128i*)argb;
    for (i = 0; i < limit; i += 16) {
--- a/src/dsp/common_sse41.h
+++ b/src/dsp/common_sse41.h
@@ -44,12 +44,12 @@ static WEBP_INLINE void VP8PlanarTo24b_SSE41(
  // Process R.
  {
-    const __m128i shuff0 = _mm_set_epi8(
+    const __m128i shuff0 =
-        5, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);
+        _mm_set_epi8(5, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);
-    const __m128i shuff1 = _mm_set_epi8(
+    const __m128i shuff1 = _mm_set_epi8(-1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7,
-        -1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1);
+                                        -1, -1, 6, -1, -1);
-    const __m128i shuff2 = _mm_set_epi8(
+    const __m128i shuff2 = _mm_set_epi8(-1, -1, 15, -1, -1, 14, -1, -1, 13, -1,
-     -1, -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1);
+                                        -1, 12, -1, -1, 11, -1);
    WEBP_SSE41_SHUFF(R, in0, in1)
  }
@@ -57,23 +57,23 @@ static WEBP_INLINE void VP8PlanarTo24b_SSE41(
  {
    // Same as before, just shifted to the left by one and including the right
    // padding.
-    const __m128i shuff0 = _mm_set_epi8(
+    const __m128i shuff0 =
-        -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);
+        _mm_set_epi8(-1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);
-    const __m128i shuff1 = _mm_set_epi8(
+    const __m128i shuff1 =
-        10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);
+        _mm_set_epi8(10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);
-    const __m128i shuff2 = _mm_set_epi8(
+    const __m128i shuff2 = _mm_set_epi8(-1, 15, -1, -1, 14, -1, -1, 13, -1, -1,
-     -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1);
+                                        12, -1, -1, 11, -1, -1);
    WEBP_SSE41_SHUFF(G, in2, in3)
  }
  // Process B.
  {
-    const __m128i shuff0 = _mm_set_epi8(
+    const __m128i shuff0 =
-        -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);
+        _mm_set_epi8(-1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);
-    const __m128i shuff1 = _mm_set_epi8(
+    const __m128i shuff1 =
-        -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);
+        _mm_set_epi8(-1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);
-    const __m128i shuff2 = _mm_set_epi8(
+    const __m128i shuff2 = _mm_set_epi8(15, -1, -1, 14, -1, -1, 13, -1, -1, 12,
-      15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1, 10);
+                                        -1, -1, 11, -1, -1, 10);
    WEBP_SSE41_SHUFF(B, in4, in5)
  }
--- a/src/dsp/cost.c
+++ b/src/dsp/cost.c
@@ -14,42 +14,37 @@
 #include <stdlib.h>
 #include "src/dsp/cpu.h"
 #include "src/webp/types.h"
 #include "src/dsp/dsp.h"
 #include "src/enc/cost_enc.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/webp/types.h"
 //------------------------------------------------------------------------------
 // Boolean-cost cost table
 const uint16_t VP8EntropyCost[256] = {
-  1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216,
+    1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216, 1178, 1152,
-  1178, 1152, 1110, 1076, 1061, 1024, 1024,  992,  968,  951,
+    1110, 1076, 1061, 1024, 1024, 992,  968,  951,  939,  911,  896,  878,
-   939,  911,  896,  878,  871,  854,  838,  820,  811,  794,
+    871,  854,  838,  820,  811,  794,  786,  768,  768,  752,  740,  732,
-   786,  768,  768,  752,  740,  732,  720,  709,  704,  690,
+    720,  709,  704,  690,  683,  672,  666,  655,  647,  640,  631,  622,
-   683,  672,  666,  655,  647,  640,  631,  622,  615,  607,
+    615,  607,  598,  592,  586,  576,  572,  564,  559,  555,  547,  541,
-   598,  592,  586,  576,  572,  564,  559,  555,  547,  541,
+    534,  528,  522,  512,  512,  504,  500,  494,  488,  483,  477,  473,
-   534,  528,  522,  512,  512,  504,  500,  494,  488,  483,
+    467,  461,  458,  452,  448,  443,  438,  434,  427,  424,  419,  415,
-   477,  473,  467,  461,  458,  452,  448,  443,  438,  434,
+    410,  406,  403,  399,  394,  390,  384,  384,  377,  374,  370,  366,
-   427,  424,  419,  415,  410,  406,  403,  399,  394,  390,
+    362,  359,  355,  351,  347,  342,  342,  336,  333,  330,  326,  323,
-   384,  384,  377,  374,  370,  366,  362,  359,  355,  351,
+    320,  316,  312,  308,  305,  302,  299,  296,  293,  288,  287,  283,
-   347,  342,  342,  336,  333,  330,  326,  323,  320,  316,
+    280,  277,  274,  272,  268,  266,  262,  256,  256,  256,  251,  248,
-   312,  308,  305,  302,  299,  296,  293,  288,  287,  283,
+    245,  242,  240,  237,  234,  232,  228,  226,  223,  221,  218,  216,
-   280,  277,  274,  272,  268,  266,  262,  256,  256,  256,
+    214,  211,  208,  205,  203,  201,  198,  196,  192,  191,  188,  187,
-   251,  248,  245,  242,  240,  237,  234,  232,  228,  226,
+    183,  181,  179,  176,  175,  171,  171,  168,  165,  163,  160,  159,
-   223,  221,  218,  216,  214,  211,  208,  205,  203,  201,
+    156,  154,  152,  150,  148,  146,  144,  142,  139,  138,  135,  133,
-   198,  196,  192,  191,  188,  187,  183,  181,  179,  176,
+    131,  128,  128,  125,  123,  121,  119,  117,  115,  113,  111,  110,
-   175,  171,  171,  168,  165,  163,  160,  159,  156,  154,
+    107,  105,  103,  102,  100,  98,   96,   94,   92,   91,   89,   86,
-   152,  150,  148,  146,  144,  142,  139,  138,  135,  133,
+    86,   83,   82,   80,   77,   76,   74,   73,   71,   69,   67,   66,
-   131,  128,  128,  125,  123,  121,  119,  117,  115,  113,
+    64,   63,   61,   59,   57,   55,   54,   52,   51,   49,   47,   46,
-   111,  110,  107,  105,  103,  102,  100,   98,   96,   94,
+    44,   43,   41,   40,   38,   36,   35,   33,   32,   30,   29,   27,
-    92,   91,   89,   86,   86,   83,   82,   80,   77,   76,
+    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,   10,   9,
-    74,   73,   71,   69,   67,   66,   64,   63,   61,   59,
+    7,    6,    4,    3};
    57,   55,   54,   52,   51,   49,   47,   46,   44,   43,
    41,   40,   38,   36,   35,   33,   32,   30,   29,   27,
    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,
    10,    9,    7,    6,    4,    3
 };
 //------------------------------------------------------------------------------
 // Level cost tables
@@ -57,263 +52,177 @@ const uint16_t VP8EntropyCost[256] = {
 // fixed costs for coding levels, deduce from the coding tree.
 // This is only the part that doesn't depend on the probability state.
 const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
-     0,  256,  256,  256,  256,  432,  618,  630,
+    0,    256,  256,  256,  256,  432,  618,  630,  731,  640,  640,  828,
-   731,  640,  640,  828,  901,  948, 1021, 1101,
+    901,  948,  1021, 1101, 1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
-  1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
+    1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497, 1540, 1570, 1613, 1280,
-  1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497,
+    1295, 1317, 1332, 1358, 1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
-  1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358,
+    1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679, 1694, 1716, 1731, 1775,
-  1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
+    1790, 1812, 1827, 1853, 1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
-  1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679,
+    1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832, 1838, 1847, 1853, 1878,
-  1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853,
+    1884, 1893, 1899, 1910, 1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
-  1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
+    1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059, 2065, 2074, 2080, 2100,
-  1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832,
+    2106, 2115, 2121, 2132, 2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
-  1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910,
+    2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283, 2289, 2298, 2304, 2168,
-  1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
+    2174, 2183, 2189, 2200, 2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
-  1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059,
+    2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351, 2357, 2366, 2372, 2392,
-  2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132,
+    2398, 2407, 2413, 2424, 2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
-  2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
+    2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573, 2579, 2588, 2594, 2619,
-  2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283,
+    2625, 2634, 2640, 2651, 2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
-  2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200,
+    2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572, 2578, 2587, 2593, 2613,
-  2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
+    2619, 2628, 2634, 2645, 2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
-  2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
+    2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796, 2802, 2811, 2817, 2840,
-  2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424,
+    2846, 2855, 2861, 2872, 2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
-  2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
+    2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023, 3029, 3038, 3044, 3064,
-  2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573,
+    3070, 3079, 3085, 3096, 3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
-  2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651,
+    3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086, 3092, 3101, 3107, 3132,
-  2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
+    3138, 3147, 3153, 3164, 3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
-  2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572,
+    3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313, 3319, 3328, 3334, 3354,
-  2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645,
+    3360, 3369, 3375, 3386, 3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
-  2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
+    3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537, 3543, 3552, 3558, 2816,
-  2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796,
+    2822, 2831, 2837, 2848, 2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
-  2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872,
+    2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999, 3005, 3014, 3020, 3040,
-  2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
+    3046, 3055, 3061, 3072, 3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
-  2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023,
+    3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221, 3227, 3236, 3242, 3267,
-  3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096,
+    3273, 3282, 3288, 3299, 3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
-  3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
+    3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289, 3295, 3304, 3310, 3330,
-  3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086,
+    3336, 3345, 3351, 3362, 3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
-  3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164,
+    3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513, 3519, 3528, 3534, 3557,
-  3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
+    3563, 3572, 3578, 3589, 3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
-  3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313,
+    3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740, 3746, 3755, 3761, 3781,
-  3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
+    3787, 3796, 3802, 3813, 3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
-  3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
+    3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734, 3740, 3749, 3755, 3780,
-  3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537,
+    3786, 3795, 3801, 3812, 3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
-  3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848,
+    3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961, 3967, 3976, 3982, 4002,
-  2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
+    4008, 4017, 4023, 4034, 4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
-  2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999,
+    4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185, 4191, 4200, 4206, 4070,
-  3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072,
+    4076, 4085, 4091, 4102, 4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
-  3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
+    4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253, 4259, 4268, 4274, 4294,
-  3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221,
+    4300, 4309, 4315, 4326, 4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
-  3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299,
+    4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475, 4481, 4490, 4496, 4521,
-  3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
+    4527, 4536, 4542, 4553, 4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
-  3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289,
+    4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547, 3553, 3562, 3568, 3588,
-  3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362,
+    3594, 3603, 3609, 3620, 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
-  3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
+    3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771, 3777, 3786, 3792, 3815,
-  3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513,
+    3821, 3830, 3836, 3847, 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
-  3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589,
+    3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998, 4004, 4013, 4019, 4039,
-  3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
+    4045, 4054, 4060, 4071, 4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
-  3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740,
+    3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061, 4067, 4076, 4082, 4107,
-  3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813,
+    4113, 4122, 4128, 4139, 4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
-  3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
+    4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288, 4294, 4303, 4309, 4329,
-  3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734,
+    4335, 4344, 4350, 4361, 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
-  3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812,
+    4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512, 4518, 4527, 4533, 4328,
-  3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
+    4334, 4343, 4349, 4360, 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
-  3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961,
+    4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511, 4517, 4526, 4532, 4552,
-  3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034,
+    4558, 4567, 4573, 4584, 4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
-  4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
+    4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733, 4739, 4748, 4754, 4779,
-  4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185,
+    4785, 4794, 4800, 4811, 4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
-  4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102,
+    4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801, 4807, 4816, 4822, 4842,
-  4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
+    4848, 4857, 4863, 4874, 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
-  4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253,
+    4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025, 5031, 5040, 5046, 5069,
-  4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326,
+    5075, 5084, 5090, 5101, 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
-  4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
+    5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252, 5258, 5267, 5273, 5293,
-  4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475,
+    5299, 5308, 5314, 5325, 5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
-  4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553,
+    4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709, 4715, 4724, 4730, 4755,
-  4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
+    4761, 4770, 4776, 4787, 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
-  4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547,
+    4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936, 4942, 4951, 4957, 4977,
-  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
+    4983, 4992, 4998, 5009, 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
-  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
+    5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160, 5166, 5175, 5181, 5045,
-  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
+    5051, 5060, 5066, 5077, 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
-  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
+    5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228, 5234, 5243, 5249, 5269,
-  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
+    5275, 5284, 5290, 5301, 5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
-  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
+    5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450, 5456, 5465, 5471, 5496,
-  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
+    5502, 5511, 5517, 5528, 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
-  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
+    5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449, 5455, 5464, 5470, 5490,
-  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
+    5496, 5505, 5511, 5522, 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
-  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
+    5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673, 5679, 5688, 5694, 5717,
-  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
+    5723, 5732, 5738, 5749, 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
-  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
+    5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900, 5906, 5915, 5921, 5941,
-  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
+    5947, 5956, 5962, 5973, 5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
-  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
+    5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963, 5969, 5978, 5984, 6009,
-  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
+    6015, 6024, 6030, 6041, 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
-  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
+    6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190, 6196, 6205, 6211, 6231,
-  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
+    6237, 6246, 6252, 6263, 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
-  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
+    6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414, 6420, 6429, 6435, 3515,
-  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
+    3521, 3530, 3536, 3547, 3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
-  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
+    3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698, 3704, 3713, 3719, 3739,
-  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
+    3745, 3754, 3760, 3771, 3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
-  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+    3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920, 3926, 3935, 3941, 3966,
-  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
+    3972, 3981, 3987, 3998, 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
-  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
+    4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988, 3994, 4003, 4009, 4029,
-  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
+    4035, 4044, 4050, 4061, 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
-  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
+    4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212, 4218, 4227, 4233, 4256,
-  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
+    4262, 4271, 4277, 4288, 4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
-  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
+    4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439, 4445, 4454, 4460, 4480,
-  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
+    4486, 4495, 4501, 4512, 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
-  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
+    4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433, 4439, 4448, 4454, 4479,
-  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
+    4485, 4494, 4500, 4511, 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
-  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
+    4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660, 4666, 4675, 4681, 4701,
-  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
+    4707, 4716, 4722, 4733, 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
-  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+    4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884, 4890, 4899, 4905, 4769,
-  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
+    4775, 4784, 4790, 4801, 4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
-  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
+    4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952, 4958, 4967, 4973, 4993,
-  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
+    4999, 5008, 5014, 5025, 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
-  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
+    5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174, 5180, 5189, 5195, 5220,
-  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
+    5226, 5235, 5241, 5252, 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
-  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
+    5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636, 4642, 4651, 4657, 4677,
-  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
+    4683, 4692, 4698, 4709, 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
-  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
+    4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860, 4866, 4875, 4881, 4904,
-  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
+    4910, 4919, 4925, 4936, 4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
-  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
+    5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087, 5093, 5102, 5108, 5128,
-  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
+    5134, 5143, 5149, 5160, 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
-  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
+    5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150, 5156, 5165, 5171, 5196,
-  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
+    5202, 5211, 5217, 5228, 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
-  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
+    5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377, 5383, 5392, 5398, 5418,
-  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
+    5424, 5433, 5439, 5450, 5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
-  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
+    5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601, 5607, 5616, 5622, 5417,
-  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
+    5423, 5432, 5438, 5449, 5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
-  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
+    5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600, 5606, 5615, 5621, 5641,
-  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
+    5647, 5656, 5662, 5673, 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
-  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
+    5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822, 5828, 5837, 5843, 5868,
-  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
+    5874, 5883, 5889, 5900, 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
-  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
+    5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890, 5896, 5905, 5911, 5931,
-  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
+    5937, 5946, 5952, 5963, 5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
-  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
+    6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114, 6120, 6129, 6135, 6158,
-  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
+    6164, 6173, 6179, 6190, 6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
-  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
+    6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341, 6347, 6356, 6362, 6382,
-  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
+    6388, 6397, 6403, 6414, 6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
-  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
+    5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408, 5414, 5423, 5429, 5454,
-  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
+    5460, 5469, 5475, 5486, 5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
-  6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547,
+    5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635, 5641, 5650, 5656, 5676,
-  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
+    5682, 5691, 5697, 5708, 5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
-  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
+    5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859, 5865, 5874, 5880, 5744,
-  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
+    5750, 5759, 5765, 5776, 5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
-  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
+    5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927, 5933, 5942, 5948, 5968,
-  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
+    5974, 5983, 5989, 6000, 6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
-  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
+    6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149, 6155, 6164, 6170, 6195,
-  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
+    6201, 6210, 6216, 6227, 6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
-  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
+    6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148, 6154, 6163, 6169, 6189,
-  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
+    6195, 6204, 6210, 6221, 6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
-  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
+    6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372, 6378, 6387, 6393, 6416,
-  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
+    6422, 6431, 6437, 6448, 6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
-  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
+    6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599, 6605, 6614, 6620, 6640,
-  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
+    6646, 6655, 6661, 6672, 6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
-  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
+    6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662, 6668, 6677, 6683, 6708,
-  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
+    6714, 6723, 6729, 6740, 6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
-  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
+    6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889, 6895, 6904, 6910, 6930,
-  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
+    6936, 6945, 6951, 6962, 6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
-  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
+    7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113, 7119, 7128, 7134, 6392,
-  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
+    6398, 6407, 6413, 6424, 6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
-  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
+    6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575, 6581, 6590, 6596, 6616,
-  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
+    6622, 6631, 6637, 6648, 6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
-  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+    6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797, 6803, 6812, 6818, 6843,
-  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
+    6849, 6858, 6864, 6875, 6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
-  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
+    6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865, 6871, 6880, 6886, 6906,
-  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
+    6912, 6921, 6927, 6938, 6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
-  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
+    7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089, 7095, 7104, 7110, 7133,
-  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
+    7139, 7148, 7154, 7165, 7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
-  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
+    7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316, 7322, 7331, 7337, 7357,
-  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
+    7363, 7372, 7378, 7389, 7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
-  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
+    7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310, 7316, 7325, 7331, 7356,
-  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
+    7362, 7371, 7377, 7388, 7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
-  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
+    7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537, 7543, 7552, 7558, 7578,
-  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
+    7584, 7593, 7599, 7610, 7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
-  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+    7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761};
  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
  6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
  5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408,
  5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486,
  5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
  5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635,
  5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708,
  5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
  5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859,
  5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
  5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
  5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927,
  5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000,
  6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
  6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149,
  6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227,
  6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
  6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148,
  6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221,
  6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
  6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372,
  6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448,
  6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
  6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599,
  6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672,
  6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
  6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662,
  6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740,
  6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
  6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889,
  6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962,
  6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
  7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113,
  7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424,
  6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
  6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575,
  6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648,
  6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
  6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797,
  6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875,
  6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
  6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865,
  6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938,
  6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
  7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089,
  7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165,
  7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
  7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
  7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389,
  7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
  7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310,
  7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388,
  7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
  7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537,
  7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610,
  7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
  7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761
 };
 //------------------------------------------------------------------------------
 // Tables for level coding
--- a/src/dsp/cost_mips32.c
+++ b/src/dsp/cost_mips32.c
@@ -38,7 +38,7 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
    return VP8BitCost(0, p0);
  }
-  __asm__ volatile (
+  __asm__ volatile(
      ".set    push                                                     \n\t"
      ".set    noreorder                                                \n\t"
      "subu    %[temp1],      %[res_last],        %[n]                  \n\t"
@@ -72,14 +72,14 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
      " lw     %[t],          0(%[t])                                   \n\t"
      "2:                                                               \n\t"
      ".set    pop                                                      \n\t"
-    : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
+      : [cost] "+&r"(cost), [t] "+&r"(t), [n] "+&r"(n), [v_reg] "=&r"(v_reg),
-      [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
+        [ctx_reg] "=&r"(ctx_reg), [p_costs] "+&r"(p_costs),
-      [temp1]"=&r"(temp1), [res_coeffs]"+&r"(res_coeffs)
+        [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
-    : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
+        [res_coeffs] "+&r"(res_coeffs)
-      [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
+      : [const_2] "r"(const_2), [const_max_level] "r"(const_max_level),
-      [inc_p_costs]"r"(inc_p_costs)
+        [VP8LevelFixedCosts] "r"(VP8LevelFixedCosts), [res_last] "r"(res_last),
-    : "memory"
+        [inc_p_costs] "r"(inc_p_costs)
-  );
+      : "memory");
  // Last coefficient is always non-zero
  {
@@ -102,7 +102,7 @@ static void SetResidualCoeffs_MIPS32(const int16_t* WEBP_RESTRICT const coeffs,
  int temp0, temp1, temp2, n, n1;
  assert(res->first == 0 || coeffs[0] == 0);
-  __asm__ volatile (
+  __asm__ volatile(
      ".set     push                                      \n\t"
      ".set     noreorder                                 \n\t"
      "addiu    %[p_coeffs],   %[p_coeffs],    28         \n\t"
@@ -127,12 +127,10 @@ static void SetResidualCoeffs_MIPS32(const int16_t* WEBP_RESTRICT const coeffs,
      " addiu   %[p_coeffs],   %[p_coeffs],    -4         \n\t"
      "2:                                                   \n\t"
      ".set     pop                                       \n\t"
-    : [p_coeffs]"+&r"(p_coeffs), [temp0]"=&r"(temp0),
+      : [p_coeffs] "+&r"(p_coeffs), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
-      [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+        [temp2] "=&r"(temp2), [n] "=&r"(n), [n1] "=&r"(n1)
      [n]"=&r"(n), [n1]"=&r"(n1)
      :
-    : "memory"
+      : "memory");
  );
  res->last = temp2;
  res->coeffs = coeffs;
 }
--- a/src/dsp/cost_mips_dsp_r2.c
+++ b/src/dsp/cost_mips_dsp_r2.c
@@ -38,13 +38,14 @@ static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
    return VP8BitCost(0, p0);
  }
-  __asm__ volatile (
+  __asm__ volatile(
      ".set      push                                                     \n\t"
      ".set      noreorder                                                \n\t"
      "subu      %[temp1],        %[res_last],        %[n]                \n\t"
      "blez      %[temp1],        2f                                      \n\t"
      " nop                                                               \n\t"
-  "1:                                                                   \n\t"
+      "1:                                                                   "
      "\n\t"
      "sll       %[temp0],        %[n],               1                   \n\t"
      "lhx       %[v_reg],        %[temp0](%[res_coeffs])                 \n\t"
      "addiu     %[n],            %[n],               1                   \n\t"
@@ -65,16 +66,16 @@ static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
      "addu      %[cost],         %[cost],            %[temp0]            \n\t"
      "bne       %[n],            %[res_last],        1b                  \n\t"
      " lw       %[t],            0(%[t])                                 \n\t"
-  "2:                                                                   \n\t"
+      "2:                                                                   "
      "\n\t"
      ".set      pop                                                      \n\t"
-    : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
+      : [cost] "+&r"(cost), [t] "+&r"(t), [n] "+&r"(n), [v_reg] "=&r"(v_reg),
-      [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
+        [ctx_reg] "=&r"(ctx_reg), [p_costs] "+&r"(p_costs),
-      [temp1]"=&r"(temp1)
+        [temp0] "=&r"(temp0), [temp1] "=&r"(temp1)
-    : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
+      : [const_2] "r"(const_2), [const_max_level] "r"(const_max_level),
-      [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
+        [VP8LevelFixedCosts] "r"(VP8LevelFixedCosts), [res_last] "r"(res_last),
-      [res_coeffs]"r"(res_coeffs), [inc_p_costs]"r"(inc_p_costs)
+        [res_coeffs] "r"(res_coeffs), [inc_p_costs] "r"(inc_p_costs)
-    : "memory"
+      : "memory");
  );
  // Last coefficient is always non-zero
  {
--- a/src/dsp/cost_neon.c
+++ b/src/dsp/cost_neon.c
@@ -16,8 +16,8 @@
 #include "src/dsp/neon.h"
 #include "src/enc/cost_enc.h"
-static const uint8_t position[16] = { 1, 2,  3,  4,  5,  6,  7,  8,
+static const uint8_t position[16] = {1, 2,  3,  4,  5,  6,  7,  8,
-                                      9, 10, 11, 12, 13, 14, 15, 16 };
+                                     9, 10, 11, 12, 13, 14, 15, 16};
 static void SetResidualCoeffs_NEON(const int16_t* WEBP_RESTRICT const coeffs,
                                   VP8Residual* WEBP_RESTRICT const res) {
--- a/src/dsp/cost_sse2.c
+++ b/src/dsp/cost_sse2.c
@@ -14,15 +14,14 @@
 #include "src/dsp/dsp.h"
 #if defined(WEBP_USE_SSE2)
 #include <assert.h>
 #include <emmintrin.h>
 #include <assert.h>
 #include "src/webp/types.h"
 #include "src/dsp/cpu.h"
 #include "src/enc/cost_enc.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/utils/utils.h"
 #include "src/webp/types.h"
 //------------------------------------------------------------------------------
--- a/src/dsp/cpu.c
+++ b/src/dsp/cpu.c
@@ -33,18 +33,19 @@
 // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
 #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
-  __asm__ volatile (
+  __asm__ volatile(
      "mov %%ebx, %%edi\n"
      "cpuid\n"
      "xchg %%edi, %%ebx\n"
-    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+      : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]),
        "=d"(cpu_info[3])
      : "a"(info_type), "c"(0));
 }
 #elif defined(__i386__) || defined(__x86_64__)
 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
-  __asm__ volatile (
+  __asm__ volatile("cpuid\n"
-    "cpuid\n"
+                   : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
-    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+                     "=d"(cpu_info[3])
                   : "a"(info_type), "c"(0));
 }
 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
@@ -67,13 +68,13 @@ static WEBP_INLINE uint64_t xgetbv(void) {
  const uint32_t ecx = 0;
  uint32_t eax, edx;
  // Use the raw opcode for xgetbv for compatibility with older toolchains.
-  __asm__ volatile (
+  __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
-    ".byte 0x0f, 0x01, 0xd0\n"
+                   : "=a"(eax), "=d"(edx)
-    : "=a"(eax), "=d"(edx) : "c" (ecx));
+                   : "c"(ecx));
  return ((uint64_t)edx << 32) | eax;
 }
-#elif (defined(_M_X64) || defined(_M_IX86)) && \
+#elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \
-      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
+    _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
 #include <immintrin.h>
 #define xgetbv() _xgetbv(0)
 #elif defined(_MSC_VER) && defined(_M_IX86)
@@ -241,7 +242,6 @@ static int mipsCPUInfo(CPUFeature feature) {
  } else {
    return 0;
  }
 }
 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
 VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
--- a/src/dsp/cpu.h
+++ b/src/dsp/cpu.h
@@ -192,6 +192,7 @@
 #if defined(WEBP_USE_THREAD) && !defined(_WIN32)
 #include <pthread.h>  // NOLINT
 // clang-format off
 #define WEBP_DSP_INIT(func)                                         \
  do {                                                              \
    static volatile VP8CPUInfo func##_last_cpuinfo_used =           \
@@ -202,7 +203,9 @@
    func##_last_cpuinfo_used = VP8GetCPUInfo;                       \
    (void)pthread_mutex_unlock(&func##_lock);                       \
  } while (0)
 // clang-format on
 #else   // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
 // clang-format off
 #define WEBP_DSP_INIT(func)                               \
  do {                                                    \
    static volatile VP8CPUInfo func##_last_cpuinfo_used = \
@@ -211,6 +214,7 @@
    func();                                               \
    func##_last_cpuinfo_used = VP8GetCPUInfo;             \
  } while (0)
 // clang-format on
 #endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
 // Defines an Init + helper function that control multiple initialization of
--- a/src/dsp/dec.c
+++ b/src/dsp/dec.c
@@ -34,13 +34,14 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 #define STORE(x, y, v) \
  dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
-#define STORE2(y, dc, d, c) do {    \
+#define STORE2(y, dc, d, c) \
  do {                      \
    const int DC = (dc);    \
    STORE(0, y, DC + (d));  \
    STORE(1, y, DC + (c));  \
    STORE(2, y, DC - (c));  \
    STORE(3, y, DC - (d));  \
-} while (0)
+  } while (0)
 #if !WEBP_NEON_OMIT_C_CODE
 static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
@@ -150,8 +151,8 @@ static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
  int i;
  for (i = 0; i < 4; ++i) {
    const int a0 = in[0 + i] + in[12 + i];
-    const int a1 = in[4 + i] + in[ 8 + i];
+    const int a1 = in[4 + i] + in[8 + i];
-    const int a2 = in[4 + i] - in[ 8 + i];
+    const int a2 = in[4 + i] - in[8 + i];
    const int a3 = in[0 + i] - in[12 + i];
    tmp[0 + i] = a0 + a1;
    tmp[8 + i] = a0 - a1;
@@ -164,7 +165,7 @@ static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
    const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
    const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
    const int a3 = dc - tmp[3 + i * 4];
-    out[ 0] = (a0 + a1) >> 3;
+    out[0] = (a0 + a1) >> 3;
    out[16] = (a3 + a2) >> 3;
    out[32] = (a0 - a1) >> 3;
    out[48] = (a3 - a2) >> 3;
@@ -268,9 +269,9 @@ static void VE4_C(uint8_t* dst) {    // vertical
  const uint8_t* top = dst - BPS;
  const uint8_t vals[4] = {
      AVG3(top[-1], top[0], top[1]),
-    AVG3(top[ 0], top[1], top[2]),
+      AVG3(top[0], top[1], top[2]),
-    AVG3(top[ 1], top[2], top[3]),
+      AVG3(top[1], top[2], top[3]),
-    AVG3(top[ 2], top[3], top[4])
+      AVG3(top[2], top[3], top[4]),
  };
  int i;
  for (i = 0; i < 4; ++i) {
@@ -393,8 +394,7 @@ static void HU4_C(uint8_t* dst) {   // Horizontal-Up
  DST(1, 0) = AVG3(I, J, K);
  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
-  DST(3, 2) = DST(2, 2) =
+  DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
    DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }
 static void HD4_C(uint8_t* dst) {  // Horizontal-Down
@@ -493,46 +493,46 @@ VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 // 4 pixels in, 2 pixels out
 static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];  // in [-893,892]
  const int a1 = VP8ksclip2[(a + 4) >> 3];            // in [-16,15]
  const int a2 = VP8ksclip2[(a + 3) >> 3];
  p[-step] = VP8kclip1[p0 + a2];
-  p[    0] = VP8kclip1[q0 - a1];
+  p[0] = VP8kclip1[q0 - a1];
 }
 // 4 pixels in, 4 pixels out
 static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  const int a = 3 * (q0 - p0);
  const int a1 = VP8ksclip2[(a + 4) >> 3];
  const int a2 = VP8ksclip2[(a + 3) >> 3];
  const int a3 = (a1 + 1) >> 1;
-  p[-2*step] = VP8kclip1[p1 + a3];
+  p[-2 * step] = VP8kclip1[p1 + a3];
-  p[-  step] = VP8kclip1[p0 + a2];
+  p[-step] = VP8kclip1[p0 + a2];
-  p[      0] = VP8kclip1[q0 - a1];
+  p[0] = VP8kclip1[q0 - a1];
-  p[   step] = VP8kclip1[q1 - a3];
+  p[step] = VP8kclip1[q1 - a3];
 }
 // 6 pixels in, 6 pixels out
 static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
-  const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
+  const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
-  const int q0 = p[0], q1 = p[step], q2 = p[2*step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
  const int a3 = (9 * a + 63) >> 7;   // eq. to ((1 * a + 7) * 9) >> 7
-  p[-3*step] = VP8kclip1[p2 + a3];
+  p[-3 * step] = VP8kclip1[p2 + a3];
-  p[-2*step] = VP8kclip1[p1 + a2];
+  p[-2 * step] = VP8kclip1[p1 + a2];
-  p[-  step] = VP8kclip1[p0 + a1];
+  p[-step] = VP8kclip1[p0 + a1];
-  p[      0] = VP8kclip1[q0 - a1];
+  p[0] = VP8kclip1[q0 - a1];
-  p[   step] = VP8kclip1[q1 - a2];
+  p[step] = VP8kclip1[q1 - a2];
-  p[ 2*step] = VP8kclip1[q2 - a3];
+  p[2 * step] = VP8kclip1[q2 - a3];
 }
 static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
@@ -545,8 +545,8 @@ static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
 #endif  // !WEBP_NEON_OMIT_C_CODE
 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
+static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p, int step, int t,
-                                      int step, int t, int it) {
+                                      int it) {
  const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
  const int p0 = p[-step], q0 = p[0];
  const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
@@ -602,9 +602,8 @@ static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
 // Complex In-loop filtering (Paragraph 15.3)
 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
+static WEBP_INLINE void FilterLoop26_C(uint8_t* p, int hstride, int vstride,
-                                       int hstride, int vstride, int size,
+                                       int size, int thresh, int ithresh,
                                       int thresh, int ithresh,
                                       int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
@@ -619,9 +618,8 @@ static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
  }
 }
-static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
+static WEBP_INLINE void FilterLoop24_C(uint8_t* p, int hstride, int vstride,
-                                       int hstride, int vstride, int size,
+                                       int size, int thresh, int ithresh,
                                       int thresh, int ithresh,
                                       int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
@@ -639,19 +637,19 @@ static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
 #if !WEBP_NEON_OMIT_C_CODE
 // on macroblock edges
-static void VFilter16_C(uint8_t* p, int stride,
+static void VFilter16_C(uint8_t* p, int stride, int thresh, int ithresh,
-                        int thresh, int ithresh, int hev_thresh) {
+                        int hev_thresh) {
  FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
 }
-static void HFilter16_C(uint8_t* p, int stride,
+static void HFilter16_C(uint8_t* p, int stride, int thresh, int ithresh,
-                        int thresh, int ithresh, int hev_thresh) {
+                        int hev_thresh) {
  FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
 }
 // on three inner edges
-static void VFilter16i_C(uint8_t* p, int stride,
+static void VFilter16i_C(uint8_t* p, int stride, int thresh, int ithresh,
-                         int thresh, int ithresh, int hev_thresh) {
+                         int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4 * stride;
@@ -661,8 +659,8 @@ static void VFilter16i_C(uint8_t* p, int stride,
 #endif  // !WEBP_NEON_OMIT_C_CODE
 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static void HFilter16i_C(uint8_t* p, int stride,
+static void HFilter16i_C(uint8_t* p, int stride, int thresh, int ithresh,
-                         int thresh, int ithresh, int hev_thresh) {
+                         int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4;
--- a/src/dsp/dec_clip_tables.c
+++ b/src/dsp/dec_clip_tables.c
@@ -12,8 +12,8 @@
 // Author: Skal (pascal.massimino@gmail.com)
 #include "src/dsp/cpu.h"
 #include "src/webp/types.h"
 #include "src/dsp/dsp.h"
 #include "src/webp/types.h"
 // define to 0 to have run-time table initialization
 #if !defined(USE_STATIC_TABLES)
@@ -65,8 +65,7 @@ static const uint8_t abs0[255 + 255 + 1] = {
    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
    0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
    0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+    0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};
 };
 static const uint8_t sclip1[1020 + 1020 + 1] = {
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
@@ -238,8 +237,7 @@ static const uint8_t sclip1[1020 + 1020 + 1] = {
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f};
 };
 static const uint8_t sclip2[112 + 112 + 1] = {
    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
@@ -260,8 +258,7 @@ static const uint8_t sclip2[112 + 112 + 1] = {
    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
 };
 static const uint8_t clip1[255 + 511 + 1] = {
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -327,8 +324,7 @@ static const uint8_t clip1[255 + 511 + 1] = {
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 };
 #else
--- a/src/dsp/dec_mips32.c
+++ b/src/dsp/dec_mips32.c
@@ -33,7 +33,7 @@ static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
  const int a1 = VP8ksclip2[(a + 4) >> 3];
  const int a2 = VP8ksclip2[(a + 3) >> 3];
  p[-step] = VP8kclip1[p0 + a2];
-  p[    0] = VP8kclip1[q0 - a1];
+  p[0] = VP8kclip1[q0 - a1];
 }
 // 4 pixels in, 4 pixels out
@@ -44,9 +44,9 @@ static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
  const int a2 = VP8ksclip2[(a + 3) >> 3];
  const int a3 = (a1 + 1) >> 1;
  p[-2 * step] = VP8kclip1[p1 + a3];
-  p[-    step] = VP8kclip1[p0 + a2];
+  p[-step] = VP8kclip1[p0 + a2];
-  p[        0] = VP8kclip1[q0 - a1];
+  p[0] = VP8kclip1[q0 - a1];
-  p[     step] = VP8kclip1[q1 - a3];
+  p[step] = VP8kclip1[q1 - a3];
 }
 // 6 pixels in, 6 pixels out
@@ -60,10 +60,10 @@ static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
  const int a3 = (9 * a + 63) >> 7;   // eq. to ((1 * a + 7) * 9) >> 7
  p[-3 * step] = VP8kclip1[p2 + a3];
  p[-2 * step] = VP8kclip1[p1 + a2];
-  p[-    step] = VP8kclip1[p0 + a1];
+  p[-step] = VP8kclip1[p0 + a1];
-  p[        0] = VP8kclip1[q0 - a1];
+  p[0] = VP8kclip1[q0 - a1];
-  p[     step] = VP8kclip1[q1 - a2];
+  p[step] = VP8kclip1[q1 - a2];
-  p[ 2 * step] = VP8kclip1[q2 - a3];
+  p[2 * step] = VP8kclip1[q2 - a3];
 }
 static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
@@ -76,8 +76,8 @@ static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
  return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
 }
-static WEBP_INLINE int needs_filter2(const uint8_t* p,
+static WEBP_INLINE int needs_filter2(const uint8_t* p, int step, int t,
-                                     int step, int t, int it) {
+                                     int it) {
  const int p3 = p[-4 * step], p2 = p[-3 * step];
  const int p1 = p[-2 * step], p0 = p[-step];
  const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
@@ -89,9 +89,9 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,
         abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
 }
-static WEBP_INLINE void FilterLoop26(uint8_t* p,
+static WEBP_INLINE void FilterLoop26(uint8_t* p, int hstride, int vstride,
-                                     int hstride, int vstride, int size,
+                                     int size, int thresh, int ithresh,
-                                     int thresh, int ithresh, int hev_thresh) {
+                                     int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
    if (needs_filter2(p, hstride, thresh2, ithresh)) {
@@ -105,9 +105,9 @@ static WEBP_INLINE void FilterLoop26(uint8_t* p,
  }
 }
-static WEBP_INLINE void FilterLoop24(uint8_t* p,
+static WEBP_INLINE void FilterLoop24(uint8_t* p, int hstride, int vstride,
-                                     int hstride, int vstride, int size,
+                                     int size, int thresh, int ithresh,
-                                     int thresh, int ithresh, int hev_thresh) {
+                                     int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
    if (needs_filter2(p, hstride, thresh2, ithresh)) {
@@ -122,13 +122,13 @@ static WEBP_INLINE void FilterLoop24(uint8_t* p,
 }
 // on macroblock edges
-static void VFilter16(uint8_t* p, int stride,
+static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh,
-                      int thresh, int ithresh, int hev_thresh) {
+                      int hev_thresh) {
  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
 }
-static void HFilter16(uint8_t* p, int stride,
+static void HFilter16(uint8_t* p, int stride, int thresh, int ithresh,
-                      int thresh, int ithresh, int hev_thresh) {
+                      int hev_thresh) {
  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
 }
@@ -158,8 +158,8 @@ static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
 }
 // on three inner edges
-static void VFilter16i(uint8_t* p, int stride,
+static void VFilter16i(uint8_t* p, int stride, int thresh, int ithresh,
-                       int thresh, int ithresh, int hev_thresh) {
+                       int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4 * stride;
@@ -167,8 +167,8 @@ static void VFilter16i(uint8_t* p, int stride,
  }
 }
-static void HFilter16i(uint8_t* p, int stride,
+static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh,
-                       int thresh, int ithresh, int hev_thresh) {
+                       int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4;
--- a/src/dsp/dec_mips_dsp_r2.c
+++ b/src/dsp/dec_mips_dsp_r2.c
@@ -25,7 +25,7 @@ static void TransformDC(const int16_t* WEBP_RESTRICT in,
                        uint8_t* WEBP_RESTRICT dst) {
  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
-  __asm__ volatile (
+  __asm__ volatile(
    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, dst,
                        0, 0, 0, 0,
                        0, 1, 2, 3,
@@ -56,7 +56,7 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
-  __asm__ volatile (
+  __asm__ volatile(
    "ins              %[c4],      %[d4],     16,       16    \n\t"
    "replv.ph         %[temp1],   %[a]                       \n\t"
    "replv.ph         %[temp4],   %[d1]                      \n\t"
@@ -88,7 +88,7 @@ static void TransformOne(const int16_t* WEBP_RESTRICT in,
  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw              %[temp1],   0(%[in])                 \n\t"
    "ulw              %[temp2],   16(%[in])                \n\t"
    LOAD_IN_X2(temp5, temp6, 24, 26)
@@ -159,14 +159,14 @@ static void TransformTwo(const int16_t* WEBP_RESTRICT in,
  }
 }
-static WEBP_INLINE void FilterLoop26(uint8_t* p,
+static WEBP_INLINE void FilterLoop26(uint8_t* p, int hstride, int vstride,
-                                     int hstride, int vstride, int size,
+                                     int size, int thresh, int ithresh,
-                                     int thresh, int ithresh, int hev_thresh) {
+                                     int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
  int temp10, temp11, temp12, temp13, temp14, temp15;
-  __asm__ volatile (
+  __asm__ volatile(
      ".set      push                                      \n\t"
      ".set      noreorder                                 \n\t"
      "1:                                                  \n\t"
@@ -284,29 +284,28 @@ static WEBP_INLINE void FilterLoop26(uint8_t* p,
      "bgtz      %[size],   1b                             \n\t"
      " addu     %[p],      %[p],           %[vstride]     \n\t"
      ".set      pop                                       \n\t"
-    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),[temp3]"=&r"(temp3),
+      : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
-      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
+        [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6),
-      [temp7]"=&r"(temp7),[temp8]"=&r"(temp8),[temp9]"=&r"(temp9),
+        [temp7] "=&r"(temp7), [temp8] "=&r"(temp8), [temp9] "=&r"(temp9),
-      [temp10]"=&r"(temp10),[temp11]"=&r"(temp11),[temp12]"=&r"(temp12),
+        [temp10] "=&r"(temp10), [temp11] "=&r"(temp11), [temp12] "=&r"(temp12),
-      [temp13]"=&r"(temp13),[temp14]"=&r"(temp14),[temp15]"=&r"(temp15),
+        [temp13] "=&r"(temp13), [temp14] "=&r"(temp14), [temp15] "=&r"(temp15),
-      [size]"+&r"(size), [p]"+&r"(p)
+        [size] "+&r"(size), [p] "+&r"(p)
-    : [hstride]"r"(hstride), [thresh2]"r"(thresh2),
+      : [hstride] "r"(hstride), [thresh2] "r"(thresh2), [ithresh] "r"(ithresh),
-      [ithresh]"r"(ithresh),[vstride]"r"(vstride), [hev_thresh]"r"(hev_thresh),
+        [vstride] "r"(vstride), [hev_thresh] "r"(hev_thresh),
-      [VP8kclip1]"r"(VP8kclip1)
+        [VP8kclip1] "r"(VP8kclip1)
-    : "memory"
+      : "memory");
  );
 }
-static WEBP_INLINE void FilterLoop24(uint8_t* p,
+static WEBP_INLINE void FilterLoop24(uint8_t* p, int hstride, int vstride,
-                                     int hstride, int vstride, int size,
+                                     int size, int thresh, int ithresh,
-                                     int thresh, int ithresh, int hev_thresh) {
+                                     int hev_thresh) {
  int p0, q0, p1, q1, p2, q2, p3, q3;
  int step1, step2, temp1, temp2, temp3, temp4;
  uint8_t* pTemp0;
  uint8_t* pTemp1;
  const int thresh2 = 2 * thresh + 1;
-  __asm__ volatile (
+  __asm__ volatile(
      ".set      push                                   \n\t"
      ".set      noreorder                              \n\t"
      "bltz      %[size],    3f                         \n\t"
@@ -413,27 +412,26 @@ static WEBP_INLINE void FilterLoop24(uint8_t* p,
      " addu     %[p],       %[p],          %[vstride]  \n\t"
      "3:                                               \n\t"
      ".set      pop                                    \n\t"
-    : [p0]"=&r"(p0), [q0]"=&r"(q0), [p1]"=&r"(p1), [q1]"=&r"(q1),
+      : [p0] "=&r"(p0), [q0] "=&r"(q0), [p1] "=&r"(p1), [q1] "=&r"(q1),
-      [p2]"=&r"(p2), [q2]"=&r"(q2), [p3]"=&r"(p3), [q3]"=&r"(q3),
+        [p2] "=&r"(p2), [q2] "=&r"(q2), [p3] "=&r"(p3), [q3] "=&r"(q3),
-      [step2]"=&r"(step2), [step1]"=&r"(step1), [temp1]"=&r"(temp1),
+        [step2] "=&r"(step2), [step1] "=&r"(step1), [temp1] "=&r"(temp1),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
+        [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
-      [pTemp0]"=&r"(pTemp0), [pTemp1]"=&r"(pTemp1), [p]"+&r"(p),
+        [pTemp0] "=&r"(pTemp0), [pTemp1] "=&r"(pTemp1), [p] "+&r"(p),
-      [size]"+&r"(size)
+        [size] "+&r"(size)
-    : [vstride]"r"(vstride), [ithresh]"r"(ithresh),
+      : [vstride] "r"(vstride), [ithresh] "r"(ithresh),
-      [hev_thresh]"r"(hev_thresh), [hstride]"r"(hstride),
+        [hev_thresh] "r"(hev_thresh), [hstride] "r"(hstride),
-      [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
+        [VP8kclip1] "r"(VP8kclip1), [thresh2] "r"(thresh2)
-    : "memory"
+      : "memory");
  );
 }
 // on macroblock edges
-static void VFilter16(uint8_t* p, int stride,
+static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh,
-                      int thresh, int ithresh, int hev_thresh) {
+                      int hev_thresh) {
  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
 }
-static void HFilter16(uint8_t* p, int stride,
+static void HFilter16(uint8_t* p, int stride, int thresh, int ithresh,
-                      int thresh, int ithresh, int hev_thresh) {
+                      int hev_thresh) {
  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
 }
@@ -451,8 +449,8 @@ static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
 }
 // on three inner edges
-static void VFilter16i(uint8_t* p, int stride,
+static void VFilter16i(uint8_t* p, int stride, int thresh, int ithresh,
-                       int thresh, int ithresh, int hev_thresh) {
+                       int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4 * stride;
@@ -460,8 +458,8 @@ static void VFilter16i(uint8_t* p, int stride,
  }
 }
-static void HFilter16i(uint8_t* p, int stride,
+static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh,
-                       int thresh, int ithresh, int hev_thresh) {
+                       int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4;
@@ -489,7 +487,7 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
  const int thresh2 = 2 * thresh + 1;
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
  uint8_t* p1 = p - stride;
-  __asm__ volatile (
+  __asm__ volatile(
      ".set      push                                      \n\t"
      ".set      noreorder                                 \n\t"
      "li        %[i],        16                           \n\t"
@@ -533,31 +531,32 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
      "bgtz      %[i],        0b                           \n\t"
      " addiu    %[p],        %[p],           1            \n\t"
      " .set     pop                                       \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8),
-      [p]"+&r"(p), [i]"=&r"(i), [p1]"+&r"(p1)
+        [p] "+&r"(p), [i] "=&r"(i), [p1] "+&r"(p1)
-    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
+      : [stride] "r"(stride), [VP8kclip1] "r"(VP8kclip1), [thresh2] "r"(thresh2)
-    : "memory"
+      : "memory");
  );
 }
 // TEMP0 = SRC[A + A1 * BPS]
 // TEMP1 = SRC[B + B1 * BPS]
 // TEMP2 = SRC[C + C1 * BPS]
 // TEMP3 = SRC[D + D1 * BPS]
 // clang-format off
 #define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3,                               \
                     A, A1, B, B1, C, C1, D, D1, SRC)                          \
  "lbu      %[" #TEMP0 "],   " #A "+" #A1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
  "lbu      %[" #TEMP1 "],   " #B "+" #B1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
  "lbu      %[" #TEMP2 "],   " #C "+" #C1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
-  "lbu      %[" #TEMP3 "],   " #D "+" #D1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
+  "lbu      %[" #TEMP3 "],   " #D "+" #D1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"
 // clang-format on
 static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
  int i;
  const int thresh2 = 2 * thresh + 1;
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
-  __asm__ volatile (
+  __asm__ volatile(
    ".set      push                                     \n\t"
    ".set      noreorder                                \n\t"
    "li        %[i],       16                           \n\t"
@@ -622,14 +621,16 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
 // DST[A * BPS]     = TEMP0
 // DST[B + C * BPS] = TEMP1
 // clang-format off
 #define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST)                              \
  "usw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #DST "])         \n\t"     \
  "usw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #DST "])  \n\t"
 // clang-format on
 static void VE4(uint8_t* dst) {  // vertical
  const uint8_t* top = dst - BPS;
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
-  __asm__ volatile (
+  __asm__ volatile(
      "ulw             %[temp0],   -1(%[top])              \n\t"
      "ulh             %[temp1],   3(%[top])               \n\t"
      "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
@@ -645,20 +646,19 @@ static void VE4(uint8_t* dst) {    // vertical
      "addq.ph         %[temp6],   %[temp6],    %[temp3]   \n\t"
      "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
      "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
-    "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t"
+      "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t"  //
-    STORE_8_BYTES(temp4, temp4, 0, 0, 1, dst)
+      STORE_8_BYTES(temp4, temp4, 0, 0, 1, dst)                   //
      STORE_8_BYTES(temp4, temp4, 2, 0, 3, dst)
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp6]"=&r"(temp6)
+        [temp6] "=&r"(temp6)
-    : [top]"r"(top), [dst]"r"(dst)
+      : [top] "r"(top), [dst] "r"(dst)
-    : "memory"
+      : "memory");
  );
 }
 static void DC4(uint8_t* dst) {  // DC
  int temp0, temp1, temp2, temp3, temp4;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw          %[temp0],   -1*" XSTR(BPS) "(%[dst]) \n\t"
    LOAD_4_BYTES(temp1, temp2, temp3, temp4, -1, 0, -1, 1, -1, 2, -1, 3, dst)
    "ins          %[temp1],   %[temp2],    8,     8    \n\t"
@@ -668,8 +668,8 @@ static void DC4(uint8_t* dst) {   // DC
    "raddu.w.qb   %[temp1],   %[temp1]                 \n\t"
    "addu         %[temp0],   %[temp0],    %[temp1]    \n\t"
    "shra_r.w     %[temp0],   %[temp0],    3           \n\t"
-    "replv.qb     %[temp0],   %[temp0]                 \n\t"
+    "replv.qb     %[temp0],   %[temp0]                 \n\t" //
-    STORE_8_BYTES(temp0, temp0, 0, 0, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 0, 0, 1, dst)  //
    STORE_8_BYTES(temp0, temp0, 2, 0, 3, dst)
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
@@ -681,9 +681,9 @@ static void DC4(uint8_t* dst) {   // DC
 static void RD4(uint8_t* dst) {  // Down-right
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8;
-  __asm__ volatile (
+  __asm__ volatile(
    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -1, 0, -1, 1, -1, 2, -1, 3, dst)
-    "ulw            %[temp7],   -1-" XSTR(BPS) "(%[dst])       \n\t"
+    "ulw            %[temp7],   -1-" XSTR(BPS) "(%[dst])       \n\t"  //
    "ins            %[temp1],   %[temp0], 16, 16               \n\t"
    "preceu.ph.qbr  %[temp5],   %[temp7]                       \n\t"
    "ins            %[temp2],   %[temp1], 16, 16               \n\t"
@@ -702,17 +702,17 @@ static void RD4(uint8_t* dst) {   // Down-right
    "shll.ph        %[temp0],   %[temp0], 1                    \n\t"
    "shra_r.ph      %[temp1],   %[temp1], 2                    \n\t"
    "addq.ph        %[temp8],   %[temp0], %[temp8]             \n\t"
-    "lbu            %[temp5],   3-" XSTR(BPS) "(%[dst])        \n\t"
+    "lbu            %[temp5],   3-" XSTR(BPS) "(%[dst])        \n\t"  //
    "precrq.ph.w    %[temp7],   %[temp7], %[temp7]             \n\t"
    "shra_r.ph      %[temp8],   %[temp8], 2                    \n\t"
    "ins            %[temp7],   %[temp5], 0,  8                \n\t"
    "precr.qb.ph    %[temp2],   %[temp1], %[temp3]             \n\t"
    "raddu.w.qb     %[temp4],   %[temp7]                       \n\t"
    "precr.qb.ph    %[temp6],   %[temp8], %[temp1]             \n\t"
-    "shra_r.w       %[temp4],   %[temp4], 2                    \n\t"
+    "shra_r.w       %[temp4],   %[temp4], 2                    \n\t" //
    STORE_8_BYTES(temp2, temp6, 3, 0, 1, dst)
    "prepend        %[temp2],   %[temp8], 8                    \n\t"
-    "prepend        %[temp6],   %[temp4], 8                    \n\t"
+    "prepend        %[temp6],   %[temp4], 8                    \n\t" //
    STORE_8_BYTES(temp2, temp6, 2, 0, 0, dst)
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
@@ -724,14 +724,16 @@ static void RD4(uint8_t* dst) {   // Down-right
 // TEMP0 = SRC[A * BPS]
 // TEMP1 = SRC[B + C * BPS]
 // clang-format off
 #define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC)                               \
  "ulw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #SRC "])         \n\t"     \
  "ulw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #SRC "])  \n\t"
 // clang-format on
 static void LD4(uint8_t* dst) {  // Down-Left
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
+  __asm__ volatile(
    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
    "preceu.ph.qbl   %[temp2],    %[temp0]                     \n\t"
    "preceu.ph.qbr   %[temp3],    %[temp0]                     \n\t"
@@ -778,7 +780,7 @@ static void LD4(uint8_t* dst) {   // Down-Left
 static void DC8uv(uint8_t* dst) {  // DC
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
+  __asm__ volatile(
    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
    LOAD_4_BYTES(temp6, temp7, temp8, temp9, -1, 4, -1, 5, -1, 6, -1, 7, dst)
@@ -814,7 +816,7 @@ static void DC8uv(uint8_t* dst) {     // DC
 static void DC8uvNoLeft(uint8_t* dst) {  // DC with no left samples
  int temp0, temp1;
-  __asm__ volatile (
+  __asm__ volatile(
    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
@@ -838,7 +840,7 @@ static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
 static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8;
-  __asm__ volatile (
+  __asm__ volatile(
    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
    LOAD_4_BYTES(temp6, temp7, temp8, temp1, -1, 4, -1, 5, -1, 6, -1, 7, dst)
    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
@@ -870,6 +872,7 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
 #undef STORE_8_BYTES
 #undef LOAD_4_BYTES
 // clang-format off
 #define CLIPPING(SIZE)                                         \
  "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t" \
  "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t" \
@@ -894,11 +897,11 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
  "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t" \
 ".endif                                                  \n\t"
-
+#define CLIP_8B_TO_DST(DST, TOP, SIZE)                                 \
-#define CLIP_8B_TO_DST(DST, TOP, SIZE) do {                                    \
+  do {                                                                 \
    int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1];                    \
    int temp0, temp1, temp2, temp3;                                    \
-  __asm__ volatile (                                                           \
+    __asm__ volatile(                                                  \
    ".if " #SIZE " < 8                                     \n\t"       \
      "ulw             %[temp0],   0(%[top])               \n\t"       \
      "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"       \
@@ -924,9 +927,11 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
      : [top_1]"r"(top_1), [top]"r"((TOP)), [dst]"r"((DST))            \
      : "memory"                                                       \
    );                                                                 \
-} while (0)
+  } while (0)
 // clang-format on
-#define CLIP_TO_DST(DST, SIZE) do {                                            \
+#define CLIP_TO_DST(DST, SIZE)                        \
  do {                                                \
    int y;                                            \
    const uint8_t* top = (DST) - BPS;                 \
    const int top_1 = ((int)top[-1] << 16) + top[-1]; \
@@ -934,12 +939,10 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
      CLIP_8B_TO_DST((DST), top, (SIZE));             \
      (DST) += BPS;                                   \
    }                                                 \
-} while (0)
+  } while (0)
 #define TRUE_MOTION(DST, SIZE) \
-static void TrueMotion##SIZE(uint8_t* (DST)) {                                 \
+  static void TrueMotion##SIZE(uint8_t*(DST)) { CLIP_TO_DST((DST), (SIZE)); }
  CLIP_TO_DST((DST), (SIZE));                                                  \
 }
 TRUE_MOTION(dst, 4)
 TRUE_MOTION(dst, 8)
--- a/src/dsp/dec_msa.c
+++ b/src/dsp/dec_msa.c
@@ -11,7 +11,6 @@
 //
 // Author(s):  Prashant Patil   (prashant.patil@imgtec.com)
 #include "src/dsp/dsp.h"
 #if defined(WEBP_USE_MSA)
@@ -21,7 +20,8 @@
 //------------------------------------------------------------------------------
 // Transforms
-#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) {  \
+#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3)    \
  {                                                              \
    v4i32 a1_m, b1_m, c1_m, d1_m;                                \
    v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m;                \
    const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091);         \
@@ -36,14 +36,14 @@
    d_tmp2_m = (in3 * sinpi8sqrt2) >> 16;                        \
    d1_m = d_tmp1_m + d_tmp2_m;                                  \
    BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
-}
+  }
 static void TransformOne(const int16_t* WEBP_RESTRICT in,
                         uint8_t* WEBP_RESTRICT dst) {
  v8i16 input0, input1;
  v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
  v4i32 res0, res1, res2, res3;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  v16i8 dest0, dest1, dest2, dest3;
  LD_SH2(in, 8, input0, input1);
@@ -55,10 +55,10 @@ static void TransformOne(const int16_t* WEBP_RESTRICT in,
  SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
  TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
  LD_SB4(dst, BPS, dest0, dest1, dest2, dest3);
-  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
+  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
-             res0, res1, res2, res3);
+             res2, res3);
-  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
+  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
-             res0, res1, res2, res3);
+             res3);
  ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
  CLIP_SW4_0_255(res0, res1, res2, res3);
  PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
@@ -77,10 +77,10 @@ static void TransformTwo(const int16_t* WEBP_RESTRICT in,
 static void TransformWHT(const int16_t* WEBP_RESTRICT in,
                         int16_t* WEBP_RESTRICT out) {
  v8i16 input0, input1;
-  const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
+  const v8i16 mask0 = {0, 1, 2, 3, 8, 9, 10, 11};
-  const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
+  const v8i16 mask1 = {4, 5, 6, 7, 12, 13, 14, 15};
-  const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
+  const v8i16 mask2 = {0, 4, 8, 12, 1, 5, 9, 13};
-  const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
+  const v8i16 mask3 = {3, 7, 11, 15, 2, 6, 10, 14};
  v8i16 tmp0, tmp1, tmp2, tmp3;
  v8i16 out0, out1;
@@ -131,24 +131,23 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
  const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
  const int in2 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
  const int in3 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
-  v4i32 tmp0 = { 0 };
+  v4i32 tmp0 = {0};
  v4i32 out0 = __msa_fill_w(a + d4);
  v4i32 out1 = __msa_fill_w(a + c4);
  v4i32 out2 = __msa_fill_w(a - c4);
  v4i32 out3 = __msa_fill_w(a - d4);
  v4i32 res0, res1, res2, res3;
-  const v4i32 zero = { 0 };
+  const v4i32 zero = {0};
  v16u8 dest0, dest1, dest2, dest3;
  INSERT_W4_SW(in3, in2, -in2, -in3, tmp0);
-  ADD4(out0, tmp0, out1, tmp0, out2, tmp0, out3, tmp0,
+  ADD4(out0, tmp0, out1, tmp0, out2, tmp0, out3, tmp0, out0, out1, out2, out3);
       out0, out1, out2, out3);
  SRAI_W4_SW(out0, out1, out2, out3, 3);
  LD_UB4(dst, BPS, dest0, dest1, dest2, dest3);
-  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
+  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
-             res0, res1, res2, res3);
+             res2, res3);
-  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
+  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
-             res0, res1, res2, res3);
+             res3);
  ADD4(res0, out0, res1, out1, res2, out2, res3, out3, res0, res1, res2, res3);
  CLIP_SW4_0_255(res0, res1, res2, res3);
  PCKEV_B2_SW(res0, res1, res2, res3, out0, out1);
@@ -159,26 +158,30 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
 //------------------------------------------------------------------------------
 // Edge filtering functions
-#define FLIP_SIGN2(in0, in1, out0, out1) {  \
+#define FLIP_SIGN2(in0, in1, out0, out1)   \
  {                                        \
    out0 = (v16i8)__msa_xori_b(in0, 0x80); \
    out1 = (v16i8)__msa_xori_b(in1, 0x80); \
-}
+  }
-#define FLIP_SIGN4(in0, in1, in2, in3, out0, out1, out2, out3) {  \
+#define FLIP_SIGN4(in0, in1, in2, in3, out0, out1, out2, out3) \
  {                                                            \
    FLIP_SIGN2(in0, in1, out0, out1);                          \
    FLIP_SIGN2(in2, in3, out2, out3);                          \
-}
+  }
-#define FILT_VAL(q0_m, p0_m, mask, filt) do {  \
+#define FILT_VAL(q0_m, p0_m, mask, filt)    \
  do {                                      \
    v16i8 q0_sub_p0;                        \
    q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \
    filt = __msa_adds_s_b(filt, q0_sub_p0); \
    filt = __msa_adds_s_b(filt, q0_sub_p0); \
    filt = __msa_adds_s_b(filt, q0_sub_p0); \
    filt = filt & mask;                     \
-} while (0)
+  } while (0)
-#define FILT2(q_m, p_m, q, p) do {            \
+#define FILT2(q_m, p_m, q, p)                  \
  do {                                         \
    u_r = SRAI_H(temp1, 7);                    \
    u_r = __msa_sat_s_h(u_r, 7);               \
    u_l = SRAI_H(temp3, 7);                    \
@@ -188,9 +191,10 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
    p_m = __msa_adds_s_b(p_m, u);              \
    q = __msa_xori_b((v16u8)q_m, 0x80);        \
    p = __msa_xori_b((v16u8)p_m, 0x80);        \
-} while (0)
+  } while (0)
-#define LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev) do {  \
+#define LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev)       \
  do {                                                  \
    v16i8 p1_m, p0_m, q0_m, q1_m;                       \
    v16i8 filt, t1, t2;                                 \
    const v16i8 cnst4b = __msa_ldi_b(4);                \
@@ -215,9 +219,10 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
    q1 = __msa_xori_b((v16u8)q1_m, 0x80);               \
    p1_m = __msa_adds_s_b(p1_m, filt);                  \
    p1 = __msa_xori_b((v16u8)p1_m, 0x80);               \
-} while (0)
+  } while (0)
-#define LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) do {  \
+#define LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \
  do {                                                  \
    v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m;           \
    v16i8 u, filt, t1, t2, filt_sign;                   \
    v8i16 filt_r, filt_l, u_r, u_l;                     \
@@ -257,12 +262,11 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
    temp1 = temp1 + temp0;                              \
    temp3 = temp3 + temp2;                              \
    FILT2(q0_m, p0_m, q0, p0);                          \
-} while (0)
+  } while (0)
-#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in,                 \
+#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
-                     q0_in, q1_in, q2_in, q3_in,                 \
+                     limit_in, b_limit_in, thresh_in, hev_out, mask_out)     \
-                     limit_in, b_limit_in, thresh_in,            \
+  do {                                                                       \
                     hev_out, mask_out) do {                     \
    v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m;            \
    v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m;            \
    v16u8 flat_out;                                                          \
@@ -291,16 +295,18 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
    mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out);                        \
    mask_out = (limit_in < mask_out);                                        \
    mask_out = __msa_xori_b(mask_out, 0xff);                                 \
-} while (0)
+  } while (0)
-#define ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride) do { \
+#define ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride)       \
  do {                                                           \
    const uint16_t tmp0_h = __msa_copy_s_h((v8i16)in1, in1_idx); \
    const uint32_t tmp0_w = __msa_copy_s_w((v4i32)in0, in0_idx); \
    SW(tmp0_w, pdst);                                            \
    SH(tmp0_h, pdst + stride);                                   \
-} while (0)
+  } while (0)
-#define ST6x4_UB(in0, start_in0_idx, in1, start_in1_idx, pdst, stride) do { \
+#define ST6x4_UB(in0, start_in0_idx, in1, start_in1_idx, pdst, stride)  \
  do {                                                                  \
    uint8_t* ptmp1 = (uint8_t*)pdst;                                    \
    ST6x1_UB(in0, start_in0_idx, in1, start_in1_idx, ptmp1, 4);         \
    ptmp1 += stride;                                                    \
@@ -309,9 +315,10 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
    ST6x1_UB(in0, start_in0_idx + 2, in1, start_in1_idx + 2, ptmp1, 4); \
    ptmp1 += stride;                                                    \
    ST6x1_UB(in0, start_in0_idx + 3, in1, start_in1_idx + 3, ptmp1, 4); \
-} while (0)
+  } while (0)
-#define LPF_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) do {       \
+#define LPF_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask)           \
  do {                                                              \
    v16i8 p1_m, p0_m, q0_m, q1_m, filt, filt1, filt2;               \
    const v16i8 cnst4b = __msa_ldi_b(4);                            \
    const v16i8 cnst3b = __msa_ldi_b(3);                            \
@@ -327,9 +334,10 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
    p0_m = __msa_adds_s_b(p0_m, filt2);                             \
    q0_in = __msa_xori_b((v16u8)q0_m, 0x80);                        \
    p0_in = __msa_xori_b((v16u8)p0_m, 0x80);                        \
-} while (0)
+  } while (0)
-#define LPF_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask) do {    \
+#define LPF_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask)        \
  do {                                                        \
    v16u8 p1_a_sub_q1, p0_a_sub_q0;                           \
                                                              \
    p0_a_sub_q0 = __msa_asub_u_b(p0, q0);                     \
@@ -338,10 +346,10 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
    p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0);   \
    mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1);          \
    mask = (mask <= b_limit);                                 \
-} while (0)
+  } while (0)
-static void VFilter16(uint8_t* src, int stride,
+static void VFilter16(uint8_t* src, int stride, int b_limit_in, int limit_in,
-                      int b_limit_in, int limit_in, int thresh_in) {
+                      int thresh_in) {
  uint8_t* ptemp = src - 4 * stride;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
  v16u8 mask, hev;
@@ -350,8 +358,8 @@ static void VFilter16(uint8_t* src, int stride,
  const v16u8 b_limit = (v16u8)__msa_fill_b(b_limit_in);
  LD_UB8(ptemp, stride, p3, p2, p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               hev, mask);
+               mask);
  LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
  ptemp = src - 3 * stride;
  ST_UB4(p2, p1, p0, q0, ptemp, stride);
@@ -359,8 +367,8 @@ static void VFilter16(uint8_t* src, int stride,
  ST_UB2(q1, q2, ptemp, stride);
 }
-static void HFilter16(uint8_t* src, int stride,
+static void HFilter16(uint8_t* src, int stride, int b_limit_in, int limit_in,
-                      int b_limit_in, int limit_in, int thresh_in) {
+                      int thresh_in) {
  uint8_t* ptmp = src - 4;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
  v16u8 mask, hev;
@@ -374,11 +382,11 @@ static void HFilter16(uint8_t* src, int stride,
  LD_UB8(ptmp, stride, row0, row1, row2, row3, row4, row5, row6, row7);
  ptmp += (8 * stride);
  LD_UB8(ptmp, stride, row8, row9, row10, row11, row12, row13, row14, row15);
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
+                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
-                      p3, p2, p1, p0, q0, q1, q2, q3);
+                      p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               hev, mask);
+               mask);
  LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
  ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
  ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
@@ -420,8 +428,8 @@ static void HFilter16(uint8_t* src, int stride,
 }
 // on three inner edges
-static void VFilterHorEdge16i(uint8_t* src, int stride,
+static void VFilterHorEdge16i(uint8_t* src, int stride, int b_limit, int limit,
-                              int b_limit, int limit, int thresh) {
+                              int thresh) {
  v16u8 mask, hev;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
  const v16u8 thresh0 = (v16u8)__msa_fill_b(thresh);
@@ -429,21 +437,21 @@ static void VFilterHorEdge16i(uint8_t* src, int stride,
  const v16u8 limit0 = (v16u8)__msa_fill_b(limit);
  LD_UB8((src - 4 * stride), stride, p3, p2, p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
-               hev, mask);
+               mask);
  LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
  ST_UB4(p1, p0, q0, q1, (src - 2 * stride), stride);
 }
-static void VFilter16i(uint8_t* src_y, int stride,
+static void VFilter16i(uint8_t* src_y, int stride, int b_limit, int limit,
-                       int b_limit, int limit, int thresh) {
+                       int thresh) {
  VFilterHorEdge16i(src_y + 4 * stride, stride, b_limit, limit, thresh);
  VFilterHorEdge16i(src_y + 8 * stride, stride, b_limit, limit, thresh);
  VFilterHorEdge16i(src_y + 12 * stride, stride, b_limit, limit, thresh);
 }
-static void HFilterVertEdge16i(uint8_t* src, int stride,
+static void HFilterVertEdge16i(uint8_t* src, int stride, int b_limit, int limit,
-                               int b_limit, int limit, int thresh) {
+                               int thresh) {
  v16u8 mask, hev;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
  v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
@@ -454,13 +462,13 @@ static void HFilterVertEdge16i(uint8_t* src, int stride,
  const v16u8 limit0 = (v16u8)__msa_fill_b(limit);
  LD_UB8(src - 4, stride, row0, row1, row2, row3, row4, row5, row6, row7);
-  LD_UB8(src - 4 + (8 * stride), stride,
+  LD_UB8(src - 4 + (8 * stride), stride, row8, row9, row10, row11, row12, row13,
-         row8, row9, row10, row11, row12, row13, row14, row15);
+         row14, row15);
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
+                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
-                      p3, p2, p1, p0, q0, q1, q2, q3);
+                      p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
-               hev, mask);
+               mask);
  LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
  ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
  ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
@@ -472,8 +480,8 @@ static void HFilterVertEdge16i(uint8_t* src, int stride,
  ST4x8_UB(tmp4, tmp5, src, stride);
 }
-static void HFilter16i(uint8_t* src_y, int stride,
+static void HFilter16i(uint8_t* src_y, int stride, int b_limit, int limit,
-                       int b_limit, int limit, int thresh) {
+                       int thresh) {
  HFilterVertEdge16i(src_y + 4, stride, b_limit, limit, thresh);
  HFilterVertEdge16i(src_y + 8, stride, b_limit, limit, thresh);
  HFilterVertEdge16i(src_y + 12, stride, b_limit, limit, thresh);
@@ -496,8 +504,8 @@ static void VFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
  LD_UB8(ptmp_src_v, stride, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
  ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
  ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               hev, mask);
+               mask);
  LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
  p2_d = __msa_copy_s_d((v2i64)p2, 0);
  p1_d = __msa_copy_s_d((v2i64)p1, 0);
@@ -538,13 +546,13 @@ static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
  const v16u8 thresh = (v16u8)__msa_fill_b(thresh_in);
  LD_UB8(ptmp_src_u, stride, row0, row1, row2, row3, row4, row5, row6, row7);
-  LD_UB8(ptmp_src_v, stride,
+  LD_UB8(ptmp_src_v, stride, row8, row9, row10, row11, row12, row13, row14,
-         row8, row9, row10, row11, row12, row13, row14, row15);
+         row15);
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
+                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
-                      p3, p2, p1, p0, q0, q1, q2, q3);
+                      p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               hev, mask);
+               mask);
  LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
  ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
  ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
@@ -562,8 +570,8 @@ static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
 }
 static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
-                      uint8_t* WEBP_RESTRICT src_v, int stride,
+                      uint8_t* WEBP_RESTRICT src_v, int stride, int b_limit_in,
-                      int b_limit_in, int limit_in, int thresh_in) {
+                      int limit_in, int thresh_in) {
  uint64_t p1_d, p0_d, q0_d, q1_d;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
  v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
@@ -578,8 +586,8 @@ static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
  src_v += (5 * stride);
  ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
  ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               hev, mask);
+               mask);
  LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
  p1_d = __msa_copy_s_d((v2i64)p1, 0);
  p0_d = __msa_copy_s_d((v2i64)p0, 0);
@@ -594,8 +602,8 @@ static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
 }
 static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
-                      uint8_t* WEBP_RESTRICT src_v, int stride,
+                      uint8_t* WEBP_RESTRICT src_v, int stride, int b_limit_in,
-                      int b_limit_in, int limit_in, int thresh_in) {
+                      int limit_in, int thresh_in) {
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
  v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
  v16u8 row9, row10, row11, row12, row13, row14, row15;
@@ -605,13 +613,12 @@ static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
  const v16u8 b_limit = (v16u8)__msa_fill_b(b_limit_in);
  LD_UB8(src_u, stride, row0, row1, row2, row3, row4, row5, row6, row7);
-  LD_UB8(src_v, stride,
+  LD_UB8(src_v, stride, row8, row9, row10, row11, row12, row13, row14, row15);
-         row8, row9, row10, row11, row12, row13, row14, row15);
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
+                      p1, p0, q0, q1, q2, q3);
-                      p3, p2, p1, p0, q0, q1, q2, q3);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               mask);
               hev, mask);
  LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
  ILVR_B2_SW(p0, p1, q1, q0, tmp0, tmp1);
  ILVRL_H2_SW(tmp1, tmp0, tmp2, tmp3);
@@ -645,11 +652,11 @@ static void SimpleHFilter16(uint8_t* src, int stride, int b_limit_in) {
  uint8_t* ptemp_src = src - 2;
  LD_UB8(ptemp_src, stride, row0, row1, row2, row3, row4, row5, row6, row7);
-  LD_UB8(ptemp_src + 8 * stride, stride,
+  LD_UB8(ptemp_src + 8 * stride, stride, row8, row9, row10, row11, row12, row13,
-         row8, row9, row10, row11, row12, row13, row14, row15);
+         row14, row15);
-  TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+  TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
+                      row9, row10, row11, row12, row13, row14, row15, p1, p0,
-                      p1, p0, q0, q1);
+                      q0, q1);
  LPF_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
  LPF_SIMPLE_FILT(p1, p0, q0, q1, mask);
  ILVRL_B2_SH(q0, p0, tmp1, tmp0);
@@ -694,7 +701,7 @@ static void DC4(uint8_t* dst) {   // DC
 static void TM4(uint8_t* dst) {
  const uint8_t* const ptemp = dst - BPS - 1;
  v8i16 T, d, r0, r1, r2, r3;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  const v8i16 TL = (v8i16)__msa_fill_h(ptemp[0 * BPS]);
  const v8i16 L0 = (v8i16)__msa_fill_h(ptemp[1 * BPS]);
  const v8i16 L1 = (v8i16)__msa_fill_h(ptemp[2 * BPS]);
@@ -714,7 +721,7 @@ static void VE4(uint8_t* dst) {    // vertical
  const uint32_t val0 = LW(ptop + 0);
  const uint32_t val1 = LW(ptop + 4);
  uint32_t out;
-  v16u8 A = { 0 }, B, C, AC, B2, R;
+  v16u8 A = {0}, B, C, AC, B2, R;
  INSERT_W2_UB(val0, val1, A);
  B = SLDI_UB(A, A, 1);
@@ -731,7 +738,7 @@ static void RD4(uint8_t* dst) {   // Down-right
  uint32_t val0 = LW(ptop + 0);
  uint32_t val1 = LW(ptop + 4);
  uint32_t val2, val3;
-  v16u8 A, B, C, AC, B2, R, A1 = { 0 };
+  v16u8 A, B, C, AC, B2, R, A1 = {0};
  INSERT_W2_UB(val0, val1, A1);
  A = SLDI_UB(A1, A1, 12);
@@ -759,7 +766,7 @@ static void LD4(uint8_t* dst) {   // Down-Left
  uint32_t val0 = LW(ptop + 0);
  uint32_t val1 = LW(ptop + 4);
  uint32_t val2, val3;
-  v16u8 A = { 0 }, B, C, AC, B2, R;
+  v16u8 A = {0}, B, C, AC, B2, R;
  INSERT_W2_UB(val0, val1, A);
  B = SLDI_UB(A, A, 1);
@@ -799,7 +806,7 @@ static void DC16(uint8_t* dst) {   // DC
 static void TM16(uint8_t* dst) {
  int j;
  v8i16 d1, d2;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  const v8i16 TL = (v8i16)__msa_fill_h(dst[-1 - BPS]);
  const v16i8 T = LD_SB(dst - BPS);
@@ -873,10 +880,11 @@ static void DC16NoTopLeft(uint8_t* dst) {   // DC with nothing
 // Chroma
-#define STORE8x8(out, dst) do {                 \
+#define STORE8x8(out, dst)                       \
  do {                                           \
    SD4(out, out, out, out, dst + 0 * BPS, BPS); \
    SD4(out, out, out, out, dst + 4 * BPS, BPS); \
-} while (0)
+  } while (0)
 static void DC8uv(uint8_t* dst) {  // DC
  uint32_t dc = 8;
@@ -900,7 +908,7 @@ static void DC8uv(uint8_t* dst) {   // DC
 static void TM8uv(uint8_t* dst) {
  int j;
  const v16i8 T1 = LD_SB(dst - BPS);
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  const v8i16 T = (v8i16)__msa_ilvr_b(zero, T1);
  const v8i16 TL = (v8i16)__msa_fill_h(dst[-1 - BPS]);
  const v8i16 d = T - TL;
--- a/src/dsp/dec_neon.c
+++ b/src/dsp/dec_neon.c
@@ -16,8 +16,8 @@
 #if defined(WEBP_USE_NEON)
 #include "src/dsp/neon.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dsp/neon.h"
 //------------------------------------------------------------------------------
 // NxM Loading functions
@@ -60,10 +60,11 @@ static WEBP_INLINE void Load4x16_NEON(const uint8_t* const src, int stride,
 #else  // WORK_AROUND_GCC
-#define LOADQ_LANE_32b(VALUE, LANE) do {                             \
+#define LOADQ_LANE_32b(VALUE, LANE)                                  \
  do {                                                               \
    (VALUE) = vld1q_lane_u32((const uint32_t*)src, (VALUE), (LANE)); \
    src += stride;                                                   \
-} while (0)
+  } while (0)
 static WEBP_INLINE void Load4x16_NEON(const uint8_t* src, int stride,
                                      uint8x16_t* const p1,
@@ -111,10 +112,10 @@ static WEBP_INLINE void Load4x16_NEON(const uint8_t* src, int stride,
 #endif  // !WORK_AROUND_GCC
 static WEBP_INLINE void Load8x16_NEON(
-    const uint8_t* const src, int stride,
+    const uint8_t* const src, int stride, uint8x16_t* const p3,
-    uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
+    uint8x16_t* const p2, uint8x16_t* const p1, uint8x16_t* const p0,
-    uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
+    uint8x16_t* const q0, uint8x16_t* const q1, uint8x16_t* const q2,
-    uint8x16_t* const q2, uint8x16_t* const q3) {
+    uint8x16_t* const q3) {
  Load4x16_NEON(src - 2, stride, p3, p2, p1, p0);
  Load4x16_NEON(src + 2, stride, q0, q1, q2, q3);
 }
@@ -131,10 +132,10 @@ static WEBP_INLINE void Load16x4_NEON(const uint8_t* const src, int stride,
 }
 static WEBP_INLINE void Load16x8_NEON(
-    const uint8_t* const src, int stride,
+    const uint8_t* const src, int stride, uint8x16_t* const p3,
-    uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
+    uint8x16_t* const p2, uint8x16_t* const p1, uint8x16_t* const p0,
-    uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
+    uint8x16_t* const q0, uint8x16_t* const q1, uint8x16_t* const q2,
-    uint8x16_t* const q2, uint8x16_t* const q3) {
+    uint8x16_t* const q3) {
  Load16x4_NEON(src - 2 * stride, stride, p3, p2, p1, p0);
  Load16x4_NEON(src + 2 * stride, stride, q0, q1, q2, q3);
 }
@@ -220,8 +221,8 @@ static WEBP_INLINE void Load8x8x2T_NEON(
 #endif  // !WORK_AROUND_GCC
-static WEBP_INLINE void Store2x8_NEON(const uint8x8x2_t v,
+static WEBP_INLINE void Store2x8_NEON(const uint8x8x2_t v, uint8_t* const dst,
-                                      uint8_t* const dst, int stride) {
+                                      int stride) {
  vst2_lane_u8(dst + 0 * stride, v, 0);
  vst2_lane_u8(dst + 1 * stride, v, 1);
  vst2_lane_u8(dst + 2 * stride, v, 2);
@@ -244,8 +245,8 @@ static WEBP_INLINE void Store2x16_NEON(const uint8x16_t p0, const uint8x16_t q0,
 }
 #if !defined(WORK_AROUND_GCC)
-static WEBP_INLINE void Store4x8_NEON(const uint8x8x4_t v,
+static WEBP_INLINE void Store4x8_NEON(const uint8x8x4_t v, uint8_t* const dst,
-                                      uint8_t* const dst, int stride) {
+                                      int stride) {
  vst4_lane_u8(dst + 0 * stride, v, 0);
  vst4_lane_u8(dst + 1 * stride, v, 1);
  vst4_lane_u8(dst + 2 * stride, v, 2);
@@ -260,12 +261,10 @@ static WEBP_INLINE void Store4x16_NEON(const uint8x16_t p1, const uint8x16_t p0,
                                       const uint8x16_t q0, const uint8x16_t q1,
                                       uint8_t* const dst, int stride) {
  uint8x8x4_t lo, hi;
-  INIT_VECTOR4(lo,
+  INIT_VECTOR4(lo, vget_low_u8(p1), vget_low_u8(p0), vget_low_u8(q0),
-               vget_low_u8(p1), vget_low_u8(p0),
+               vget_low_u8(q1));
-               vget_low_u8(q0), vget_low_u8(q1));
+  INIT_VECTOR4(hi, vget_high_u8(p1), vget_high_u8(p0), vget_high_u8(q0),
-  INIT_VECTOR4(hi,
+               vget_high_u8(q1));
               vget_high_u8(p1), vget_high_u8(p0),
               vget_high_u8(q0), vget_high_u8(q1));
  Store4x8_NEON(lo, dst - 2 + 0 * stride, stride);
  Store4x8_NEON(hi, dst - 2 + 8 * stride, stride);
 }
@@ -285,9 +284,8 @@ static WEBP_INLINE void Store16x4_NEON(const uint8x16_t p1, const uint8x16_t p0,
 }
 static WEBP_INLINE void Store8x2x2_NEON(const uint8x16_t p0,
-                                        const uint8x16_t q0,
+                                        const uint8x16_t q0, uint8_t* const u,
-                                        uint8_t* const u, uint8_t* const v,
+                                        uint8_t* const v, int stride) {
                                        int stride) {
  // p0 and q0 contain the u+v samples packed in low/high halves.
  vst1_u8(u - stride, vget_low_u8(p0));
  vst1_u8(u, vget_low_u8(q0));
@@ -298,9 +296,8 @@ static WEBP_INLINE void Store8x2x2_NEON(const uint8x16_t p0,
 static WEBP_INLINE void Store8x4x2_NEON(const uint8x16_t p1,
                                        const uint8x16_t p0,
                                        const uint8x16_t q0,
-                                        const uint8x16_t q1,
+                                        const uint8x16_t q1, uint8_t* const u,
-                                        uint8_t* const u, uint8_t* const v,
+                                        uint8_t* const v, int stride) {
                                        int stride) {
  // The p1...q1 registers contain the u+v samples packed in low/high halves.
  Store8x2x2_NEON(p1, p0, u - stride, v - stride, stride);
  Store8x2x2_NEON(q0, q1, u + stride, v + stride, stride);
@@ -308,16 +305,17 @@ static WEBP_INLINE void Store8x4x2_NEON(const uint8x16_t p1,
 #if !defined(WORK_AROUND_GCC)
-#define STORE6_LANE(DST, VAL0, VAL1, LANE) do {   \
+#define STORE6_LANE(DST, VAL0, VAL1, LANE)   \
  do {                                       \
    vst3_lane_u8((DST) - 3, (VAL0), (LANE)); \
    vst3_lane_u8((DST) + 0, (VAL1), (LANE)); \
    (DST) += stride;                         \
-} while (0)
+  } while (0)
 static WEBP_INLINE void Store6x8x2_NEON(
    const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
-    const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
+    const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2, uint8_t* u,
-    uint8_t* u, uint8_t* v, int stride) {
+    uint8_t* v, int stride) {
  uint8x8x3_t u0, u1, v0, v1;
  INIT_VECTOR3(u0, vget_low_u8(p2), vget_low_u8(p1), vget_low_u8(p0));
  INIT_VECTOR3(u1, vget_low_u8(q0), vget_low_u8(q1), vget_low_u8(q2));
@@ -345,16 +343,13 @@ static WEBP_INLINE void Store6x8x2_NEON(
 static WEBP_INLINE void Store4x8x2_NEON(const uint8x16_t p1,
                                        const uint8x16_t p0,
                                        const uint8x16_t q0,
-                                        const uint8x16_t q1,
+                                        const uint8x16_t q1, uint8_t* const u,
-                                        uint8_t* const u, uint8_t* const v,
+                                        uint8_t* const v, int stride) {
                                        int stride) {
  uint8x8x4_t u0, v0;
-  INIT_VECTOR4(u0,
+  INIT_VECTOR4(u0, vget_low_u8(p1), vget_low_u8(p0), vget_low_u8(q0),
-               vget_low_u8(p1), vget_low_u8(p0),
+               vget_low_u8(q1));
-               vget_low_u8(q0), vget_low_u8(q1));
+  INIT_VECTOR4(v0, vget_high_u8(p1), vget_high_u8(p0), vget_high_u8(q0),
-  INIT_VECTOR4(v0,
+               vget_high_u8(q1));
               vget_high_u8(p1), vget_high_u8(p0),
               vget_high_u8(q0), vget_high_u8(q1));
  vst4_lane_u8(u - 2 + 0 * stride, u0, 0);
  vst4_lane_u8(u - 2 + 1 * stride, u0, 1);
  vst4_lane_u8(u - 2 + 2 * stride, u0, 2);
@@ -397,8 +392,7 @@ static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
 }
 static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
-                                    const int16x8_t row23,
+                                    const int16x8_t row23, uint8_t* const dst) {
                                    uint8_t* const dst) {
  uint32x2_t dst01 = vdup_n_u32(0);
  uint32x2_t dst23 = vdup_n_u32(0);
@@ -467,8 +461,7 @@ static int8x16_t GetBaseDelta0_NEON(const int8x16_t p0, const int8x16_t q0) {
 //------------------------------------------------------------------------------
 static void ApplyFilter2NoFlip_NEON(const int8x16_t p0s, const int8x16_t q0s,
-                                    const int8x16_t delta,
+                                    const int8x16_t delta, int8x16_t* const op0,
                                    int8x16_t* const op0,
                                    int8x16_t* const oq0) {
  const int8x16_t kCst3 = vdupq_n_s8(0x03);
  const int8x16_t kCst4 = vdupq_n_s8(0x04);
@@ -483,8 +476,8 @@ static void ApplyFilter2NoFlip_NEON(const int8x16_t p0s, const int8x16_t q0s,
 #if defined(WEBP_USE_INTRINSICS)
 static void ApplyFilter2_NEON(const int8x16_t p0s, const int8x16_t q0s,
-                              const int8x16_t delta,
+                              const int8x16_t delta, uint8x16_t* const op0,
-                              uint8x16_t* const op0, uint8x16_t* const oq0) {
+                              uint8x16_t* const oq0) {
  const int8x16_t kCst3 = vdupq_n_s8(0x03);
  const int8x16_t kCst4 = vdupq_n_s8(0x04);
  const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
@@ -499,8 +492,8 @@ static void ApplyFilter2_NEON(const int8x16_t p0s, const int8x16_t q0s,
 static void DoFilter2_NEON(const uint8x16_t p1, const uint8x16_t p0,
                           const uint8x16_t q0, const uint8x16_t q1,
-                           const uint8x16_t mask,
+                           const uint8x16_t mask, uint8x16_t* const op0,
-                           uint8x16_t* const op0, uint8x16_t* const oq0) {
+                           uint8x16_t* const oq0) {
  const int8x16_t p1s = FlipSign_NEON(p1);
  const int8x16_t p0s = FlipSign_NEON(p0);
  const int8x16_t q0s = FlipSign_NEON(q0);
@@ -533,6 +526,7 @@ static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
 #else
 // Load/Store vertical edge
 // clang-format off
 #define LOAD8x4(c1, c2, c3, c4, b1, b2, stride)                                \
  "vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \
  "vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \
@@ -600,9 +594,10 @@ static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
  "vand       q9, q9, q11                 \n"  /* apply filter mask */         \
  DO_SIMPLE_FILTER(p0, q0, q9)                 /* apply filter */              \
  FLIP_SIGN_BIT2(p0, q0, q10)
 // clang-format on
 static void SimpleVFilter16_NEON(uint8_t* p, int stride, int thresh) {
-  __asm__ volatile (
+  __asm__ volatile(
      "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
      "vld1.u8    {q1}, [%[p]], %[stride]        \n"  // p1
@@ -610,7 +605,7 @@ static void SimpleVFilter16_NEON(uint8_t* p, int stride, int thresh) {
      "vld1.u8    {q3}, [%[p]], %[stride]        \n"  // q0
      "vld1.u8    {q12}, [%[p]]                  \n"  // q1
-    DO_FILTER2(q1, q2, q3, q12, %[thresh])
+      DO_FILTER2(q1, q2, q3, q12, % [thresh])  //
      "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
@@ -618,12 +613,11 @@ static void SimpleVFilter16_NEON(uint8_t* p, int stride, int thresh) {
      "vst1.u8    {q3}, [%[p]]                   \n"  // store oq0
      : [p] "+r"(p)
      : [stride] "r"(stride), [thresh] "r"(thresh)
-    : "memory", QRegs
+      : "memory", QRegs);
  );
 }
 static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
-  __asm__ volatile (
+  __asm__ volatile(
    "sub        r4, %[p], #2                   \n"  // base1 = p - 2
    "lsl        r6, %[stride], #1              \n"  // r6 = 2 * stride
    "add        r5, r4, %[stride]              \n"  // base2 = base1 + stride
@@ -708,12 +702,11 @@ static uint8x16_t NeedsFilter2_NEON(const uint8x16_t p3, const uint8x16_t p2,
 //  4-points filter
-static void ApplyFilter4_NEON(
+static void ApplyFilter4_NEON(const int8x16_t p1, const int8x16_t p0,
    const int8x16_t p1, const int8x16_t p0,
                              const int8x16_t q0, const int8x16_t q1,
-    const int8x16_t delta0,
+                              const int8x16_t delta0, uint8x16_t* const op1,
-    uint8x16_t* const op1, uint8x16_t* const op0,
+                              uint8x16_t* const op0, uint8x16_t* const oq0,
-    uint8x16_t* const oq0, uint8x16_t* const oq1) {
+                              uint8x16_t* const oq1) {
  const int8x16_t kCst3 = vdupq_n_s8(0x03);
  const int8x16_t kCst4 = vdupq_n_s8(0x04);
  const int8x16_t delta1 = vqaddq_s8(delta0, kCst4);
@@ -727,8 +720,7 @@ static void ApplyFilter4_NEON(
  *oq1 = FlipSignBack_NEON(vqsubq_s8(q1, a3));  // clip(q1 - a3)
 }
-static void DoFilter4_NEON(
+static void DoFilter4_NEON(const uint8x16_t p1, const uint8x16_t p0,
    const uint8x16_t p1, const uint8x16_t p0,
                           const uint8x16_t q0, const uint8x16_t q1,
                           const uint8x16_t mask, const uint8x16_t hev_mask,
                           uint8x16_t* const op1, uint8x16_t* const op0,
@@ -761,12 +753,13 @@ static void DoFilter4_NEON(
 //  6-points filter
-static void ApplyFilter6_NEON(
+static void ApplyFilter6_NEON(const int8x16_t p2, const int8x16_t p1,
-    const int8x16_t p2, const int8x16_t p1, const int8x16_t p0,
+                              const int8x16_t p0, const int8x16_t q0,
-    const int8x16_t q0, const int8x16_t q1, const int8x16_t q2,
+                              const int8x16_t q1, const int8x16_t q2,
-    const int8x16_t delta,
+                              const int8x16_t delta, uint8x16_t* const op2,
-    uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
+                              uint8x16_t* const op1, uint8x16_t* const op0,
-    uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
+                              uint8x16_t* const oq0, uint8x16_t* const oq1,
                              uint8x16_t* const oq2) {
  // We have to compute: X = (9*a+63) >> 7, Y = (18*a+63)>>7, Z = (27*a+63) >> 7
  // Turns out, there's a common sub-expression S=9 * a - 1 that can be used
  // with the special vqrshrn_n_s16 rounding-shift-and-narrow instruction:
@@ -798,12 +791,13 @@ static void ApplyFilter6_NEON(
  *op2 = FlipSignBack_NEON(vqaddq_s8(p2, a3));  // clip(p2 + a3)
 }
-static void DoFilter6_NEON(
+static void DoFilter6_NEON(const uint8x16_t p2, const uint8x16_t p1,
-    const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
+                           const uint8x16_t p0, const uint8x16_t q0,
-    const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
+                           const uint8x16_t q1, const uint8x16_t q2,
                           const uint8x16_t mask, const uint8x16_t hev_mask,
-    uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
+                           uint8x16_t* const op2, uint8x16_t* const op1,
-    uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
+                           uint8x16_t* const op0, uint8x16_t* const oq0,
                           uint8x16_t* const oq1, uint8x16_t* const oq2) {
  // This is a fused version of DoFilter2() calling ApplyFilter2 directly
  const int8x16_t p2s = FlipSign_NEON(p2);
  const int8x16_t p1s = FlipSign_NEON(p1);
@@ -827,41 +821,41 @@ static void DoFilter6_NEON(
    const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
    const int8x16_t complex_lf_delta =
        vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
-    ApplyFilter6_NEON(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta,
+    ApplyFilter6_NEON(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta, op2, op1,
-                      op2, op1, op0, oq0, oq1, oq2);
+                      op0, oq0, oq1, oq2);
  }
 }
 // on macroblock edges
-static void VFilter16_NEON(uint8_t* p, int stride,
+static void VFilter16_NEON(uint8_t* p, int stride, int thresh, int ithresh,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int hev_thresh) {
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  Load16x8_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+    const uint8x16_t mask =
-                                              ithresh, thresh);
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
-                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+                   &oq0, &oq1, &oq2);
    Store16x2_NEON(op2, op1, p - 2 * stride, stride);
    Store16x2_NEON(op0, oq0, p + 0 * stride, stride);
    Store16x2_NEON(oq1, oq2, p + 2 * stride, stride);
  }
 }
-static void HFilter16_NEON(uint8_t* p, int stride,
+static void HFilter16_NEON(uint8_t* p, int stride, int thresh, int ithresh,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int hev_thresh) {
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  Load8x16_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+    const uint8x16_t mask =
-                                              ithresh, thresh);
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
-                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+                   &oq0, &oq1, &oq2);
    Store2x16_NEON(op2, op1, p - 2, stride);
    Store2x16_NEON(op0, oq0, p + 0, stride);
    Store2x16_NEON(oq1, oq2, p + 2, stride);
@@ -869,8 +863,8 @@ static void HFilter16_NEON(uint8_t* p, int stride,
 }
 // on three inner edges
-static void VFilter16i_NEON(uint8_t* p, int stride,
+static void VFilter16i_NEON(uint8_t* p, int stride, int thresh, int ithresh,
-                            int thresh, int ithresh, int hev_thresh) {
+                            int hev_thresh) {
  uint32_t k;
  uint8x16_t p3, p2, p1, p0;
  Load16x4_NEON(p + 2 * stride, stride, &p3, &p2, &p1, &p0);
@@ -893,8 +887,8 @@ static void VFilter16i_NEON(uint8_t* p, int stride,
 }
 #if !defined(WORK_AROUND_GCC)
-static void HFilter16i_NEON(uint8_t* p, int stride,
+static void HFilter16i_NEON(uint8_t* p, int stride, int thresh, int ithresh,
-                            int thresh, int ithresh, int hev_thresh) {
+                            int hev_thresh) {
  uint32_t k;
  uint8x16_t p3, p2, p1, p0;
  Load4x16_NEON(p + 2, stride, &p3, &p2, &p1, &p0);
@@ -921,27 +915,27 @@ static void VFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+    const uint8x16_t mask =
-                                              ithresh, thresh);
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
-                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+                   &oq0, &oq1, &oq2);
    Store8x2x2_NEON(op2, op1, u - 2 * stride, v - 2 * stride, stride);
    Store8x2x2_NEON(op0, oq0, u + 0 * stride, v + 0 * stride, stride);
    Store8x2x2_NEON(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
  }
 }
 static void VFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-                           int stride,
+                           int stride, int thresh, int ithresh,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int hev_thresh) {
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  u += 4 * stride;
  v += 4 * stride;
  Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+    const uint8x16_t mask =
-                                              ithresh, thresh);
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op1, op0, oq0, oq1;
    DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
@@ -955,26 +949,26 @@ static void HFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+    const uint8x16_t mask =
-                                              ithresh, thresh);
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
-                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+                   &oq0, &oq1, &oq2);
    Store6x8x2_NEON(op2, op1, op0, oq0, oq1, oq2, u, v, stride);
  }
 }
 static void HFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-                           int stride,
+                           int stride, int thresh, int ithresh,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int hev_thresh) {
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  u += 4;
  v += 4;
  Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+    const uint8x16_t mask =
-                                              ithresh, thresh);
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op1, op0, oq0, oq1;
    DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
@@ -1058,9 +1052,9 @@ static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
                              uint8_t* WEBP_RESTRICT dst) {
  const int kBPS = BPS;
  // kC1, kC2. Padded because vld1.16 loads 8 bytes
-  const int16_t constants[4] = { kC1, kC2, 0, 0 };
+  const int16_t constants[4] = {kC1, kC2, 0, 0};
  /* Adapted from libvpx: vp8/common/arm/neon/shortidct4x4llm_neon.asm */
-  __asm__ volatile (
+  __asm__ volatile(
      "vld1.16         {q1, q2}, [%[in]]           \n"
      "vld1.16         {d0}, [%[constants]]        \n"
@@ -1204,12 +1198,17 @@ static void TransformDC_NEON(const int16_t* WEBP_RESTRICT in,
 //------------------------------------------------------------------------------
-#define STORE_WHT(dst, col, rows) do {                  \
+#define STORE_WHT(dst, col, rows)            \
-  *dst = vgetq_lane_s32(rows.val[0], col); (dst) += 16; \
+  do {                                       \
-  *dst = vgetq_lane_s32(rows.val[1], col); (dst) += 16; \
+    *dst = vgetq_lane_s32(rows.val[0], col); \
-  *dst = vgetq_lane_s32(rows.val[2], col); (dst) += 16; \
+    (dst) += 16;                             \
-  *dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
+    *dst = vgetq_lane_s32(rows.val[1], col); \
-} while (0)
+    (dst) += 16;                             \
    *dst = vgetq_lane_s32(rows.val[2], col); \
    (dst) += 16;                             \
    *dst = vgetq_lane_s32(rows.val[3], col); \
    (dst) += 16;                             \
  } while (0)
 static void TransformWHT_NEON(const int16_t* WEBP_RESTRICT in,
                              int16_t* WEBP_RESTRICT out) {
@@ -1270,10 +1269,9 @@ static void TransformAC3_NEON(const int16_t* WEBP_RESTRICT in,
  const int16x4_t d4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
  const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
  const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
-  const uint64_t cd = (uint64_t)( d1 & 0xffff) <<  0 |
+  const uint64_t cd =
-                      (uint64_t)( c1 & 0xffff) << 16 |
+      (uint64_t)(d1 & 0xffff) << 0 | (uint64_t)(c1 & 0xffff) << 16 |
-                      (uint64_t)(-c1 & 0xffff) << 32 |
+      (uint64_t)(-c1 & 0xffff) << 32 | (uint64_t)(-d1 & 0xffff) << 48;
                      (uint64_t)(-d1 & 0xffff) << 48;
  const int16x4_t CD = vcreate_s16(cd);
  const int16x4_t B = vqadd_s16(A, CD);
  const int16x8_t m0_m1 = vcombine_s16(vqadd_s16(B, d4), vqadd_s16(B, c4));
--- a/src/dsp/dec_sse2.c
+++ b/src/dsp/dec_sse2.c
@@ -248,9 +248,8 @@ static void TransformAC3_SSE2(const int16_t* WEBP_RESTRICT in,
 // Loop Filter (Paragraph 15)
 // Compute abs(p - q) = subs(p - q) OR subs(q - p)
-#define MM_ABS(p, q)  _mm_or_si128(                                            \
+#define MM_ABS(p, q) \
-    _mm_subs_epu8((q), (p)),                                                   \
+  _mm_or_si128(_mm_subs_epu8((q), (p)), _mm_subs_epu8((p), (q)))
    _mm_subs_epu8((p), (q)))
 // Shift each byte of "x" by 3 bits while preserving by the sign bit.
 static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
@@ -262,22 +261,24 @@ static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
  *x = _mm_packs_epi16(lo_1, hi_1);
 }
-#define FLIP_SIGN_BIT2(a, b) do {                                              \
+#define FLIP_SIGN_BIT2(a, b)          \
  do {                                \
    (a) = _mm_xor_si128(a, sign_bit); \
    (b) = _mm_xor_si128(b, sign_bit); \
-} while (0)
+  } while (0)
-#define FLIP_SIGN_BIT4(a, b, c, d) do {                                        \
+#define FLIP_SIGN_BIT4(a, b, c, d) \
  do {                             \
    FLIP_SIGN_BIT2(a, b);          \
    FLIP_SIGN_BIT2(c, d);          \
-} while (0)
+  } while (0)
 // input/output is uint8_t
 static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
                                       const __m128i* const p0,
                                       const __m128i* const q0,
-                                       const __m128i* const q1,
+                                       const __m128i* const q1, int hev_thresh,
-                                       int hev_thresh, __m128i* const not_hev) {
+                                       __m128i* const not_hev) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i t_1 = MM_ABS(*p1, *p0);
  const __m128i t_2 = MM_ABS(*q1, *q0);
@@ -339,8 +340,8 @@ static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
 static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
                                         const __m128i* const p0,
                                         const __m128i* const q0,
-                                         const __m128i* const q1,
+                                         const __m128i* const q1, int thresh,
-                                         int thresh, __m128i* const mask) {
+                                         __m128i* const mask) {
  const __m128i m_thresh = _mm_set1_epi8((char)thresh);
  const __m128i t1 = MM_ABS(*p1, *q1);  // abs(p1 - q1)
  const __m128i kFE = _mm_set1_epi8((char)0xFE);
@@ -503,8 +504,7 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
 }
 static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
-                                      const uint8_t* const r8,
+                                      const uint8_t* const r8, int stride,
                                      int stride,
                                      __m128i* const p1, __m128i* const p0,
                                      __m128i* const q0, __m128i* const q1) {
  // Assume the pixels around the edge (|) are numbered as follows
@@ -539,8 +539,8 @@ static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
  }
 }
-static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
+static WEBP_INLINE void Store4x4_SSE2(__m128i* const x, uint8_t* dst,
-                                      uint8_t* dst, int stride) {
+                                      int stride) {
  int i;
  for (i = 0; i < 4; ++i, dst += stride) {
    WebPInt32ToMem(dst, _mm_cvtsi128_si32(*x));
@@ -552,9 +552,8 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
 static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
                                       const __m128i* const p0,
                                       const __m128i* const q0,
-                                       const __m128i* const q1,
+                                       const __m128i* const q1, uint8_t* r0,
-                                       uint8_t* r0, uint8_t* r8,
+                                       uint8_t* r8, int stride) {
                                       int stride) {
  __m128i t1, p1_s, p0_s, q0_s, q1_s;
  // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
@@ -636,50 +635,55 @@ static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
 //------------------------------------------------------------------------------
 // Complex In-loop filtering (Paragraph 15.3)
-#define MAX_DIFF1(p3, p2, p1, p0, m) do {                                      \
+#define MAX_DIFF1(p3, p2, p1, p0, m)       \
  do {                                     \
    (m) = MM_ABS(p1, p0);                  \
    (m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
    (m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
-} while (0)
+  } while (0)
-#define MAX_DIFF2(p3, p2, p1, p0, m) do {                                      \
+#define MAX_DIFF2(p3, p2, p1, p0, m)       \
  do {                                     \
    (m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \
    (m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
    (m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
-} while (0)
+  } while (0)
-#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) do {                          \
+#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4)          \
  do {                                                    \
    (e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]); \
    (e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]); \
    (e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]); \
    (e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]); \
-} while (0)
+  } while (0)
-#define LOADUV_H_EDGE(p, u, v, stride) do {                                    \
+#define LOADUV_H_EDGE(p, u, v, stride)                           \
  do {                                                           \
    const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]); \
    const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]); \
    (p) = _mm_unpacklo_epi64(U, V);                              \
-} while (0)
+  } while (0)
-#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) do {                     \
+#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) \
  do {                                                \
    LOADUV_H_EDGE(e1, u, v, 0 * (stride));            \
    LOADUV_H_EDGE(e2, u, v, 1 * (stride));            \
    LOADUV_H_EDGE(e3, u, v, 2 * (stride));            \
    LOADUV_H_EDGE(e4, u, v, 3 * (stride));            \
-} while (0)
+  } while (0)
-#define STOREUV(p, u, v, stride) do {                                          \
+#define STOREUV(p, u, v, stride)                   \
  do {                                             \
    _mm_storel_epi64((__m128i*)&(u)[(stride)], p); \
    (p) = _mm_srli_si128(p, 8);                    \
    _mm_storel_epi64((__m128i*)&(v)[(stride)], p); \
-} while (0)
+  } while (0)
 static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
                                         const __m128i* const p0,
                                         const __m128i* const q0,
-                                         const __m128i* const q1,
+                                         const __m128i* const q1, int thresh,
-                                         int thresh, int ithresh,
+                                         int ithresh, __m128i* const mask) {
                                         __m128i* const mask) {
  const __m128i it = _mm_set1_epi8(ithresh);
  const __m128i diff = _mm_subs_epu8(*mask, it);
  const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
@@ -689,8 +693,8 @@ static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
 }
 // on macroblock edges
-static void VFilter16_SSE2(uint8_t* p, int stride,
+static void VFilter16_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int hev_thresh) {
  __m128i t1;
  __m128i mask;
  __m128i p2, p1, p0, q0, q1, q2;
@@ -715,8 +719,8 @@ static void VFilter16_SSE2(uint8_t* p, int stride,
  _mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
 }
-static void HFilter16_SSE2(uint8_t* p, int stride,
+static void HFilter16_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int hev_thresh) {
  __m128i mask;
  __m128i p3, p2, p1, p0, q0, q1, q2, q3;
@@ -735,8 +739,8 @@ static void HFilter16_SSE2(uint8_t* p, int stride,
 }
 // on three inner edges
-static void VFilter16i_SSE2(uint8_t* p, int stride,
+static void VFilter16i_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
-                            int thresh, int ithresh, int hev_thresh) {
+                            int hev_thresh) {
  int k;
  __m128i p3, p2, p1, p0;  // loop invariants
@@ -768,8 +772,8 @@ static void VFilter16i_SSE2(uint8_t* p, int stride,
  }
 }
-static void HFilter16i_SSE2(uint8_t* p, int stride,
+static void HFilter16i_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
-                            int thresh, int ithresh, int hev_thresh) {
+                            int hev_thresh) {
  int k;
  __m128i p3, p2, p1, p0;  // loop invariants
@@ -843,8 +847,8 @@ static void HFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
 }
 static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-                           int stride,
+                           int stride, int thresh, int ithresh,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int hev_thresh) {
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;
@@ -870,8 +874,8 @@ static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
 }
 static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-                           int stride,
+                           int stride, int thresh, int ithresh,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int hev_thresh) {
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;
  Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0);  // p3, p2, p1, p0
@@ -930,7 +934,7 @@ static void LD4_SSE2(uint8_t* dst) {   // Down-Left
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcdefg));
  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
@@ -951,8 +955,8 @@ static void VR4_SSE2(uint8_t* dst) {   // Vertical-Right
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcd    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcd));
-  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               efgh    ));
+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(efgh));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
@@ -977,8 +981,8 @@ static void VL4_SSE2(uint8_t* dst) {   // Vertical-Left
  const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
  const uint32_t extra_out =
      (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               avg1    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(avg1));
-  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               avg4    ));
+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(avg4));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
@@ -1004,7 +1008,7 @@ static void RD4_SSE2(uint8_t* dst) {   // Down-right
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
-  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(abcdefg));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
--- a/src/dsp/dec_sse41.c
+++ b/src/dsp/dec_sse41.c
@@ -17,10 +17,10 @@
 #include <emmintrin.h>
 #include <smmintrin.h>
 #include "src/webp/types.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dsp/cpu.h"
 #include "src/utils/utils.h"
 #include "src/webp/types.h"
 static void HE16_SSE41(uint8_t* dst) {  // horizontal
  int j;
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -44,7 +44,6 @@ extern "C" {
 #define WEBP_RESTRICT
 #endif
 //------------------------------------------------------------------------------
 // Init stub generator
@@ -95,8 +94,7 @@ extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
 // Compute the average (DC) of four 4x4 blocks.
 // Each sub-4x4 block #i sum is stored in dc[i].
-typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref,
+typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]);
                              uint32_t dc[4]);
 extern VP8MeanMetric VP8Mean16x4;
 typedef void (*VP8BlockCopy)(const uint8_t* WEBP_RESTRICT src,
@@ -132,8 +130,8 @@ typedef struct {
  int last_non_zero;
 } VP8Histogram;
 typedef void (*VP8CHisto)(const uint8_t* WEBP_RESTRICT ref,
-                          const uint8_t* WEBP_RESTRICT pred,
+                          const uint8_t* WEBP_RESTRICT pred, int start_block,
-                          int start_block, int end_block,
+                          int end_block,
                          VP8Histogram* WEBP_RESTRICT const histo);
 extern VP8CHisto VP8CollectHistogram;
 // General-purpose util function to help VP8CollectHistogram().
@@ -248,8 +246,8 @@ extern VP8SimpleFilterFunc VP8SimpleVFilter16i;  // filter 3 inner edges
 extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
 // regular filter (on both macroblock edges and inner edges)
-typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
+typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride, int thresh,
-                                  int thresh, int ithresh, int hev_t);
+                                  int ithresh, int hev_t);
 typedef void (*VP8ChromaFilterFunc)(uint8_t* WEBP_RESTRICT u,
                                    uint8_t* WEBP_RESTRICT v, int stride,
                                    int thresh, int ithresh, int hev_t);
@@ -344,8 +342,8 @@ extern void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
 // the U/V one.
 extern void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
                                   uint8_t* WEBP_RESTRICT u,
-                                   uint8_t* WEBP_RESTRICT v,
+                                   uint8_t* WEBP_RESTRICT v, int src_width,
-                                   int src_width, int do_store);
+                                   int do_store);
 // Convert a row of accumulated (four-values) of rgba32 toward U/V
 extern void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
@@ -361,8 +359,8 @@ extern void (*WebPConvertBGRToY)(const uint8_t* WEBP_RESTRICT bgr,
 // used for plain-C fallback.
 extern void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
                                  uint8_t* WEBP_RESTRICT u,
-                                  uint8_t* WEBP_RESTRICT v,
+                                  uint8_t* WEBP_RESTRICT v, int src_width,
-                                  int src_width, int do_store);
+                                  int do_store);
 extern void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
                                    uint8_t* WEBP_RESTRICT u,
                                    uint8_t* WEBP_RESTRICT v, int width);
@@ -403,8 +401,7 @@ extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
 extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
 // Main entry calls:
-extern void WebPRescalerImportRow(
+extern void WebPRescalerImportRow(struct WebPRescaler* WEBP_RESTRICT const wrk,
    struct WebPRescaler* WEBP_RESTRICT const wrk,
                                  const uint8_t* WEBP_RESTRICT src);
 // Export one row (starting at x_out position) from rescaler.
 extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
@@ -417,12 +414,12 @@ void WebPRescalerDspInit(void);
 // Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
 // alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
-extern void (*WebPApplyAlphaMultiply)(
+extern void (*WebPApplyAlphaMultiply)(uint8_t* rgba, int alpha_first, int w,
-    uint8_t* rgba, int alpha_first, int w, int h, int stride);
+                                      int h, int stride);
 // Same, buf specifically for RGBA4444 format
-extern void (*WebPApplyAlphaMultiply4444)(
+extern void (*WebPApplyAlphaMultiply4444)(uint8_t* rgba4444, int w, int h,
-    uint8_t* rgba4444, int w, int h, int stride);
+                                          int stride);
 // Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
 // Returns true if alpha[] plane has non-trivial values different from 0xff.
@@ -442,8 +439,7 @@ extern void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT alpha,
 // Returns true if there's only trivial 0xff alpha values.
 extern int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT argb,
                               int argb_stride, int width, int height,
-                               uint8_t* WEBP_RESTRICT alpha,
+                               uint8_t* WEBP_RESTRICT alpha, int alpha_stride);
                               int alpha_stride);
 // Extract the green values from 32b values in argb[] and pack them into alpha[]
 // (this is the opposite of WebPDispatchAlphaToGreen).
@@ -462,8 +458,8 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
 // Same for a row of single values, with side alpha values.
 extern void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
-                           const uint8_t* WEBP_RESTRICT const alpha,
+                           const uint8_t* WEBP_RESTRICT const alpha, int width,
-                           int width, int inverse);
+                           int inverse);
 // Same a WebPMultRow(), but for several 'num_rows' rows.
 void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
@@ -472,8 +468,8 @@ void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
 // Plain-C versions, used as fallback by some implementations.
 void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
-                   const uint8_t* WEBP_RESTRICT const alpha,
+                   const uint8_t* WEBP_RESTRICT const alpha, int width,
-                   int width, int inverse);
+                   int inverse);
 void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
 #ifdef WORDS_BIGENDIAN
@@ -481,15 +477,15 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
 extern void (*WebPPackARGB)(const uint8_t* WEBP_RESTRICT a,
                            const uint8_t* WEBP_RESTRICT r,
                            const uint8_t* WEBP_RESTRICT g,
-                            const uint8_t* WEBP_RESTRICT b,
+                            const uint8_t* WEBP_RESTRICT b, int len,
-                            int len, uint32_t* WEBP_RESTRICT out);
+                            uint32_t* WEBP_RESTRICT out);
 #endif
 // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
 extern void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
                           const uint8_t* WEBP_RESTRICT g,
-                           const uint8_t* WEBP_RESTRICT b,
+                           const uint8_t* WEBP_RESTRICT b, int len, int step,
-                           int len, int step, uint32_t* WEBP_RESTRICT out);
+                           uint32_t* WEBP_RESTRICT out);
 // This function returns true if src[i] contains a value different from 0xff.
 extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
@@ -514,8 +510,8 @@ typedef enum {     // Filter types.
  WEBP_FILTER_FAST
 } WEBP_FILTER_TYPE;
-typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in,
+typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in, int width,
-                               int width, int height, int stride,
+                               int height, int stride,
                               uint8_t* WEBP_RESTRICT out);
 // In-place un-filtering.
 // Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
--- a/src/dsp/enc.c
+++ b/src/dsp/enc.c
@@ -26,9 +26,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 }
 #if !WEBP_NEON_OMIT_C_CODE
-static WEBP_INLINE int clip_max(int v, int max) {
+static WEBP_INLINE int clip_max(int v, int max) { return (v > max) ? max : v; }
  return (v > max) ? max : v;
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE
 //------------------------------------------------------------------------------
@@ -68,7 +66,7 @@ static void CollectHistogram_C(const uint8_t* WEBP_RESTRICT ref,
                               int start_block, int end_block,
                               VP8Histogram* WEBP_RESTRICT const histo) {
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int k;
    int16_t out[16];
@@ -105,7 +103,6 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
  }
 }
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
@@ -154,8 +151,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
 static void ITransform_C(const uint8_t* WEBP_RESTRICT ref,
                         const int16_t* WEBP_RESTRICT in,
-                         uint8_t* WEBP_RESTRICT dst,
+                         uint8_t* WEBP_RESTRICT dst, int do_two) {
                         int do_two) {
  ITransformOne(ref, in, dst);
  if (do_two) {
    ITransformOne(ref + 4, in + 16, dst + 4);
@@ -183,13 +179,13 @@ static void FTransform_C(const uint8_t* WEBP_RESTRICT src,
  }
  for (i = 0; i < 4; ++i) {
    const int a0 = (tmp[0 + i] + tmp[12 + i]);  // 15b
-    const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
+    const int a1 = (tmp[4 + i] + tmp[8 + i]);
-    const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
+    const int a2 = (tmp[4 + i] - tmp[8 + i]);
    const int a3 = (tmp[0 + i] - tmp[12 + i]);
    out[0 + i] = (a0 + a1 + 7) >> 4;  // 12b
    out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
    out[8 + i] = (a0 - a1 + 7) >> 4;
-    out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
+    out[12 + i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
  }
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE
@@ -219,16 +215,16 @@ static void FTransformWHT_C(const int16_t* WEBP_RESTRICT in,
  }
  for (i = 0; i < 4; ++i) {
    const int a0 = (tmp[0 + i] + tmp[8 + i]);  // 15b
-    const int a1 = (tmp[4 + i] + tmp[12+ i]);
+    const int a1 = (tmp[4 + i] + tmp[12 + i]);
-    const int a2 = (tmp[4 + i] - tmp[12+ i]);
+    const int a2 = (tmp[4 + i] - tmp[12 + i]);
    const int a3 = (tmp[0 + i] - tmp[8 + i]);
    const int b0 = a0 + a1;  // 16b
    const int b1 = a3 + a2;
    const int b2 = a3 - a2;
    const int b3 = a0 - a1;
-    out[ 0 + i] = b0 >> 1;     // 15b
+    out[0 + i] = b0 >> 1;  // 15b
-    out[ 4 + i] = b1 >> 1;
+    out[4 + i] = b1 >> 1;
-    out[ 8 + i] = b2 >> 1;
+    out[8 + i] = b2 >> 1;
    out[12 + i] = b3 >> 1;
  }
 }
@@ -303,8 +299,8 @@ static WEBP_INLINE void TrueMotion(uint8_t* WEBP_RESTRICT dst,
 static WEBP_INLINE void DCMode(uint8_t* WEBP_RESTRICT dst,
                               const uint8_t* WEBP_RESTRICT left,
-                               const uint8_t* WEBP_RESTRICT top,
+                               const uint8_t* WEBP_RESTRICT top, int size,
-                               int size, int round, int shift) {
+                               int round, int shift) {
  int DC = 0;
  int j;
  if (top != NULL) {
@@ -373,9 +369,9 @@ static void Intra16Preds_C(uint8_t* WEBP_RESTRICT dst,
 static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  const uint8_t vals[4] = {
      AVG3(top[-1], top[0], top[1]),
-    AVG3(top[ 0], top[1], top[2]),
+      AVG3(top[0], top[1], top[2]),
-    AVG3(top[ 1], top[2], top[3]),
+      AVG3(top[1], top[2], top[3]),
-    AVG3(top[ 2], top[3], top[4])
+      AVG3(top[2], top[3], top[4]),
  };
  int i;
  for (i = 0; i < 4; ++i) {
@@ -495,8 +491,7 @@ static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  DST(1, 0) = AVG3(I, J, K);
  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
-  DST(3, 2) = DST(2, 2) =
+  DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }
 static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -561,8 +556,7 @@ static void Intra4Preds_C(uint8_t* WEBP_RESTRICT dst,
 #if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int GetSSE(const uint8_t* WEBP_RESTRICT a,
-                              const uint8_t* WEBP_RESTRICT b,
+                              const uint8_t* WEBP_RESTRICT b, int w, int h) {
                              int w, int h) {
  int count = 0;
  int y, x;
  for (y = 0; y < h; ++y) {
@@ -637,17 +631,17 @@ static int TTransform(const uint8_t* WEBP_RESTRICT in,
  // vertical pass
  for (i = 0; i < 4; ++i, ++w) {
    const int a0 = tmp[0 + i] + tmp[8 + i];
-    const int a1 = tmp[4 + i] + tmp[12+ i];
+    const int a1 = tmp[4 + i] + tmp[12 + i];
-    const int a2 = tmp[4 + i] - tmp[12+ i];
+    const int a2 = tmp[4 + i] - tmp[12 + i];
    const int a3 = tmp[0 + i] - tmp[8 + i];
    const int b0 = a0 + a1;
    const int b1 = a3 + a2;
    const int b2 = a3 - a2;
    const int b3 = a0 - a1;
-    sum += w[ 0] * abs(b0);
+    sum += w[0] * abs(b0);
-    sum += w[ 4] * abs(b1);
+    sum += w[4] * abs(b1);
-    sum += w[ 8] * abs(b2);
+    sum += w[8] * abs(b2);
    sum += w[12] * abs(b3);
  }
  return sum;
@@ -680,9 +674,8 @@ static int Disto16x16_C(const uint8_t* WEBP_RESTRICT const a,
 //
 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static const uint8_t kZigzag[16] = {
+static const uint8_t kZigzag[16] = {0, 1,  4,  8,  5, 2,  3,  6,
-  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+                                    9, 12, 13, 10, 7, 11, 14, 15};
 };
 // Simple quantization
 static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
--- a/src/dsp/enc_mips32.c
+++ b/src/dsp/enc_mips32.c
@@ -18,8 +18,8 @@
 #if defined(WEBP_USE_MIPS32)
 #include "src/dsp/mips_macro.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/enc/cost_enc.h"
 #include "src/enc/vp8i_enc.h"
 static const int kC1 = WEBP_TRANSFORM_AC3_C1;
 static const int kC2 = WEBP_TRANSFORM_AC3_C2;
@@ -30,6 +30,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
 // A..D - offsets in bytes to load from in buffer
 // TEMP0..TEMP3 - registers for corresponding tmp elements
 // TEMP4..TEMP5 - temporary registers
 // clang-format off
 #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
  "lh      %[temp16],      " #A "(%[temp20])                 \n\t"          \
  "lh      %[temp18],      " #B "(%[temp20])                 \n\t"          \
@@ -107,6 +108,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
  "sb      %[" #TEMP4 "],    1+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
  "sb      %[" #TEMP8 "],    2+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
  "sb      %[" #TEMP12 "],   3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
 // clang-format on
 // Does one or two inverse transforms.
 static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
@@ -118,27 +120,26 @@ static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
  const int* args[3] = {(const int*)ref, (const int*)in, (const int*)dst};
  __asm__ volatile(
-    "lw      %[temp20],      4(%[args])                      \n\t"
+      "lw      %[temp20],      4(%[args])                      \n\t"        //
-    VERTICAL_PASS(0, 16,  8, 24, temp4,  temp0,  temp1,  temp2,  temp3)
+      VERTICAL_PASS(0, 16, 8, 24, temp4, temp0, temp1, temp2, temp3)        //
-    VERTICAL_PASS(2, 18, 10, 26, temp8,  temp4,  temp5,  temp6,  temp7)
+      VERTICAL_PASS(2, 18, 10, 26, temp8, temp4, temp5, temp6, temp7)       //
-    VERTICAL_PASS(4, 20, 12, 28, temp12, temp8,  temp9,  temp10, temp11)
+      VERTICAL_PASS(4, 20, 12, 28, temp12, temp8, temp9, temp10, temp11)    //
-    VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)
+      VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)  //
-    HORIZONTAL_PASS(0, temp0, temp4, temp8,  temp12)
+      HORIZONTAL_PASS(0, temp0, temp4, temp8, temp12)   //
-    HORIZONTAL_PASS(1, temp1, temp5, temp9,  temp13)
+      HORIZONTAL_PASS(1, temp1, temp5, temp9, temp13)   //
-    HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14)
+      HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14)  //
-    HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15)
+      HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15)  //
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8),
-      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
+        [temp9] "=&r"(temp9), [temp10] "=&r"(temp10), [temp11] "=&r"(temp11),
-      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
+        [temp12] "=&r"(temp12), [temp13] "=&r"(temp13), [temp14] "=&r"(temp14),
-      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
+        [temp15] "=&r"(temp15), [temp16] "=&r"(temp16), [temp17] "=&r"(temp17),
-      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
+        [temp18] "=&r"(temp18), [temp19] "=&r"(temp19), [temp20] "=&r"(temp20)
-    : [args]"r"(args), [kC1]"r"(kC1), [kC2]"r"(kC2)
+      : [args] "r"(args), [kC1] "r"(kC1), [kC2] "r"(kC2)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
 }
 static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
@@ -158,6 +159,7 @@ static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
 // J - offset in bytes (kZigzag[n] * 2)
 // K - offset in bytes (kZigzag[n] * 4)
 // N - offset in bytes (n * 2)
 // clang-format off
 #define QUANTIZE_ONE(J, K, N)                                               \
  "lh           %[temp0],       " #J "(%[ppin])                     \n\t"   \
  "lhu          %[temp1],       " #J "(%[ppsharpen])                \n\t"   \
@@ -184,6 +186,7 @@ static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
 "2:                                                                 \n\t"   \
  "sh           %[temp5],       " #J "(%[ppin])                     \n\t"   \
  "sh           %[level],       " #N "(%[pout])                     \n\t"
 // clang-format on
 static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
                                const VP8Matrix* const mtx) {
@@ -200,34 +203,30 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
  const uint32_t* ppbias = &mtx->bias[0];
  __asm__ volatile(
-    QUANTIZE_ONE( 0,  0,  0)
+      QUANTIZE_ONE(0, 0, 0)     //
-    QUANTIZE_ONE( 2,  4,  2)
+      QUANTIZE_ONE(2, 4, 2)     //
-    QUANTIZE_ONE( 8, 16,  4)
+      QUANTIZE_ONE(8, 16, 4)    //
-    QUANTIZE_ONE(16, 32,  6)
+      QUANTIZE_ONE(16, 32, 6)   //
-    QUANTIZE_ONE(10, 20,  8)
+      QUANTIZE_ONE(10, 20, 8)   //
-    QUANTIZE_ONE( 4,  8, 10)
+      QUANTIZE_ONE(4, 8, 10)    //
-    QUANTIZE_ONE( 6, 12, 12)
+      QUANTIZE_ONE(6, 12, 12)   //
-    QUANTIZE_ONE(12, 24, 14)
+      QUANTIZE_ONE(12, 24, 14)  //
-    QUANTIZE_ONE(18, 36, 16)
+      QUANTIZE_ONE(18, 36, 16)  //
-    QUANTIZE_ONE(24, 48, 18)
+      QUANTIZE_ONE(24, 48, 18)  //
-    QUANTIZE_ONE(26, 52, 20)
+      QUANTIZE_ONE(26, 52, 20)  //
-    QUANTIZE_ONE(20, 40, 22)
+      QUANTIZE_ONE(20, 40, 22)  //
-    QUANTIZE_ONE(14, 28, 24)
+      QUANTIZE_ONE(14, 28, 24)  //
-    QUANTIZE_ONE(22, 44, 26)
+      QUANTIZE_ONE(22, 44, 26)  //
-    QUANTIZE_ONE(28, 56, 28)
+      QUANTIZE_ONE(28, 56, 28)  //
-    QUANTIZE_ONE(30, 60, 30)
+      QUANTIZE_ONE(30, 60, 30)  //
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [sign] "=&r"(sign), [coeff] "=&r"(coeff), [level] "=&r"(level)
-      [sign]"=&r"(sign), [coeff]"=&r"(coeff),
+      : [pout] "r"(pout), [ppin] "r"(ppin), [ppiq] "r"(ppiq),
-      [level]"=&r"(level)
+        [max_level] "r"(max_level), [ppbias] "r"(ppbias),
-    : [pout]"r"(pout), [ppin]"r"(ppin),
+        [ppzthresh] "r"(ppzthresh), [ppsharpen] "r"(ppsharpen), [ppq] "r"(ppq)
-      [ppiq]"r"(ppiq), [max_level]"r"(max_level),
+      : "memory", "hi", "lo");
      [ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
      [ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
    : "memory", "hi", "lo"
  );
  // moved out from macro to increase possibility for earlier breaking
  for (i = 15; i >= 0; i--) {
@@ -251,6 +250,7 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
 // A - offset in bytes to load from a and b buffers
 // E..H - offsets in bytes to store first results to tmp buffer
 // E1..H1 - offsets in bytes to store second results to tmp buffer
 // clang-format off
 #define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1)                  \
  "lbu    %[temp0],  0+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
  "lbu    %[temp1],  1+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
@@ -358,6 +358,7 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
  "msub   %[temp5],  %[temp8]                \n\t"                \
  "msub   %[temp6],  %[temp0]                \n\t"                \
  "msub   %[temp7],  %[temp1]                \n\t"
 // clang-format on
 static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
                           const uint8_t* WEBP_RESTRICT const b,
@@ -366,28 +367,27 @@ static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
  __asm__ volatile(
-    HORIZONTAL_PASS(0,   0,  4,  8, 12,    64,  68,  72,  76)
+      HORIZONTAL_PASS(0, 0, 4, 8, 12, 64, 68, 72, 76)         //
-    HORIZONTAL_PASS(1,  16, 20, 24, 28,    80,  84,  88,  92)
+      HORIZONTAL_PASS(1, 16, 20, 24, 28, 80, 84, 88, 92)      //
-    HORIZONTAL_PASS(2,  32, 36, 40, 44,    96, 100, 104, 108)
+      HORIZONTAL_PASS(2, 32, 36, 40, 44, 96, 100, 104, 108)   //
-    HORIZONTAL_PASS(3,  48, 52, 56, 60,   112, 116, 120, 124)
+      HORIZONTAL_PASS(3, 48, 52, 56, 60, 112, 116, 120, 124)  //
      "mthi   $zero                             \n\t"
-    "mtlo   $zero                             \n\t"
+      "mtlo   $zero                             \n\t"                 //
-    VERTICAL_PASS( 0, 16, 32, 48,     64, 80,  96, 112,   0,  8, 16, 24)
+      VERTICAL_PASS(0, 16, 32, 48, 64, 80, 96, 112, 0, 8, 16, 24)     //
-    VERTICAL_PASS( 4, 20, 36, 52,     68, 84, 100, 116,   2, 10, 18, 26)
+      VERTICAL_PASS(4, 20, 36, 52, 68, 84, 100, 116, 2, 10, 18, 26)   //
-    VERTICAL_PASS( 8, 24, 40, 56,     72, 88, 104, 120,   4, 12, 20, 28)
+      VERTICAL_PASS(8, 24, 40, 56, 72, 88, 104, 120, 4, 12, 20, 28)   //
-    VERTICAL_PASS(12, 28, 44, 60,     76, 92, 108, 124,   6, 14, 22, 30)
+      VERTICAL_PASS(12, 28, 44, 60, 76, 92, 108, 124, 6, 14, 22, 30)  //
      "mflo   %[temp0]                          \n\t"
      "sra    %[temp1],  %[temp0],  31          \n\t"
      "xor    %[temp0],  %[temp0],  %[temp1]    \n\t"
      "subu   %[temp0],  %[temp0],  %[temp1]    \n\t"
      "sra    %[temp0],  %[temp0],  5           \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8)
-    : [a]"r"(a), [b]"r"(b), [w]"r"(w), [tmp]"r"(tmp)
+      : [a] "r"(a), [b] "r"(b), [w] "r"(w), [tmp] "r"(tmp)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
  return temp0;
 }
@@ -412,6 +412,7 @@ static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
 // temp0..temp15 holds tmp[0]..tmp[15]
 // A - offset in bytes to load from src and ref buffers
 // TEMP0..TEMP3 - registers for corresponding tmp elements
 // clang-format off
 #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3)                  \
  "lw     %[" #TEMP1 "],  0(%[args])                           \n\t"    \
  "lw     %[" #TEMP2 "],  4(%[args])                           \n\t"    \
@@ -477,6 +478,7 @@ static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
  "sh     %[" #TEMP4 "],  " #C "(%[temp20])              \n\t"    \
  "sh     %[" #TEMP8 "],  " #D "(%[temp20])              \n\t"    \
  "sh     %[" #TEMP12 "], " #B "(%[temp20])              \n\t"
 // clang-format on
 static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
                              const uint8_t* WEBP_RESTRICT ref,
@@ -486,8 +488,8 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
  int temp17, temp18, temp19, temp20;
  const int c2217 = 2217;
  const int c5352 = 5352;
-  const int* const args[3] =
+  const int* const args[3] = {(const int*)src, (const int*)ref,
-      { (const int*)src, (const int*)ref, (const int*)out };
+                              (const int*)out};
  __asm__ volatile(
    HORIZONTAL_PASS(0, temp0,  temp1,  temp2,  temp3)
@@ -517,6 +519,7 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
 #if !defined(WORK_AROUND_GCC)
 // clang-format off
 #define GET_SSE_INNER(A, B, C, D)                               \
  "lbu     %[temp0],    " #A "(%[a])                 \n\t"      \
  "lbu     %[temp1],    " #A "(%[b])                 \n\t"      \
@@ -534,6 +537,7 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
  "madd    %[temp2],    %[temp2]                     \n\t"      \
  "madd    %[temp4],    %[temp4]                     \n\t"      \
  "madd    %[temp6],    %[temp6]                     \n\t"
 // clang-format on
 #define GET_SSE(A, B, C, D)             \
  GET_SSE_INNER(A, A + 1, A + 2, A + 3) \
@@ -549,30 +553,29 @@ static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
  __asm__ volatile(
      "mult   $zero,    $zero                            \n\t"
-     GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)      //
-     GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
+      GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)      //
-     GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)      //
-     GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
+      GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)      //
-     GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)      //
-     GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
+      GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)      //
-     GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)      //
-     GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
+      GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)      //
-     GET_SSE( 8 * BPS, 4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS)
+      GET_SSE(8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS)      //
-     GET_SSE( 9 * BPS, 4 +  9 * BPS, 8 +  9 * BPS, 12 +  9 * BPS)
+      GET_SSE(9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS)      //
-     GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
+      GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)  //
-     GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
+      GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)  //
-     GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
+      GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)  //
-     GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
+      GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)  //
-     GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
+      GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)  //
-     GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
+      GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)  //
      "mflo    %[count]                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
+      : [a] "r"(a), [b] "r"(b)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
  return count;
 }
@@ -584,22 +587,21 @@ static int SSE16x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
  __asm__ volatile(
      "mult   $zero,    $zero                            \n\t"
-     GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)  //
-     GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
+      GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)  //
-     GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)  //
-     GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
+      GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)  //
-     GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)  //
-     GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
+      GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)  //
-     GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)  //
-     GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
+      GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)  //
      "mflo    %[count]                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
+      : [a] "r"(a), [b] "r"(b)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
  return count;
 }
@@ -611,18 +613,17 @@ static int SSE8x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
  __asm__ volatile(
      "mult   $zero,    $zero                            \n\t"
-     GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)  //
-     GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)  //
-     GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)  //
-     GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)  //
      "mflo    %[count]                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
+      : [a] "r"(a), [b] "r"(b)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
  return count;
 }
@@ -634,15 +635,14 @@ static int SSE4x4_MIPS32(const uint8_t* WEBP_RESTRICT a,
  __asm__ volatile(
      "mult   $zero,    $zero                            \n\t"
-     GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
+      GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)  //
      "mflo    %[count]                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
+      : [a] "r"(a), [b] "r"(b)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
  return count;
 }
--- a/src/dsp/enc_mips_dsp_r2.c
+++ b/src/dsp/enc_mips_dsp_r2.c
@@ -25,6 +25,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
 // O - output
 // I - input (macro doesn't change it)
 // clang-format off
 #define ADD_SUB_HALVES_X4(O0, O1, O2, O3, O4, O5, O6, O7,                      \
                          I0, I1, I2, I3, I4, I5, I6, I7)                      \
  "addq.ph          %[" #O0 "],   %[" #I0 "],  %[" #I1 "]     \n\t"            \
@@ -140,6 +141,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
  "sh              %[" #TEMP4 "],   " #C "(%[temp20])               \n\t"      \
  "sh              %[" #TEMP8 "],   " #D "(%[temp20])               \n\t"      \
  "sh              %[" #TEMP12 "],  " #B "(%[temp20])               \n\t"
 // clang-format on
 static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
                                 const uint8_t* WEBP_RESTRICT ref,
@@ -149,10 +151,10 @@ static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
  int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
  int temp17, temp18, temp19, temp20;
-  const int* const args[3] =
+  const int* const args[3] = {(const int*)src, (const int*)ref,
-      { (const int*)src, (const int*)ref, (const int*)out };
+                              (const int*)out};
-  __asm__ volatile (
+  __asm__ volatile(
    HORIZONTAL_PASS(0, temp0,  temp1,  temp2,  temp3)
    HORIZONTAL_PASS(1, temp4,  temp5,  temp6,  temp7)
    HORIZONTAL_PASS(2, temp8,  temp9,  temp10, temp11)
@@ -178,7 +180,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw              %[temp1],   0(%[in])                 \n\t"
    "ulw              %[temp2],   16(%[in])                \n\t"
    LOAD_IN_X2(temp5, temp6, 24, 26)
@@ -250,13 +252,14 @@ static void ITransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT ref,
  }
 }
 // clang-format off
 static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
                              const uint8_t* WEBP_RESTRICT const b,
                              const uint16_t* WEBP_RESTRICT const w) {
  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
-  __asm__ volatile (
+  __asm__ volatile(
    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, a,
                        0, 0, 0, 0,
                        0, 1, 2, 3,
@@ -317,6 +320,7 @@ static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
  );
  return abs(temp3 - temp17) >> 5;
 }
 // clang-format on
 static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
                                const uint8_t* WEBP_RESTRICT const b,
@@ -334,6 +338,7 @@ static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
 //------------------------------------------------------------------------------
 // Intra predictions
 // clang-format off
 #define FILL_PART(J, SIZE)                                            \
    "usw        %[value],  0+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
    "usw        %[value],  4+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
@@ -342,9 +347,10 @@ static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
    "usw        %[value], 12+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
  ".endif                                                  \n\t"
-#define FILL_8_OR_16(DST, VALUE, SIZE) do {                         \
+#define FILL_8_OR_16(DST, VALUE, SIZE)                                \
  do {                                                                \
    int value = (VALUE);                                              \
-  __asm__ volatile (                                                \
+    __asm__ volatile(                                                 \
      "replv.qb   %[value],  %[value]                      \n\t"      \
      FILL_PART( 0, SIZE)                                             \
      FILL_PART( 1, SIZE)                                             \
@@ -368,18 +374,19 @@ static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
      : [dst]"r"((DST))                                               \
      : "memory"                                                      \
    );                                                                \
-} while (0)
+  } while (0)
 // clang-format on
 #define VERTICAL_PRED(DST, TOP, SIZE)                                      \
-static WEBP_INLINE void VerticalPred##SIZE(                                    \
+  static WEBP_INLINE void VerticalPred##SIZE(                              \
-    uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (TOP)) {        \
+      uint8_t* WEBP_RESTRICT(DST), const uint8_t* WEBP_RESTRICT(TOP)) {    \
    int j;                                                                 \
    if ((TOP)) {                                                           \
      for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE)); \
    } else {                                                               \
      FILL_8_OR_16((DST), 127, (SIZE));                                    \
    }                                                                      \
-}
+  }
 VERTICAL_PRED(dst, top, 8)
 VERTICAL_PRED(dst, top, 16)
@@ -387,8 +394,8 @@ VERTICAL_PRED(dst, top, 16)
 #undef VERTICAL_PRED
 #define HORIZONTAL_PRED(DST, LEFT, SIZE)                                 \
-static WEBP_INLINE void HorizontalPred##SIZE(                                  \
+  static WEBP_INLINE void HorizontalPred##SIZE(                          \
-    uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (LEFT)) {       \
+      uint8_t* WEBP_RESTRICT(DST), const uint8_t* WEBP_RESTRICT(LEFT)) { \
    if (LEFT) {                                                          \
      int j;                                                             \
      for (j = 0; j < (SIZE); ++j) {                                     \
@@ -397,7 +404,7 @@ static WEBP_INLINE void HorizontalPred##SIZE(                                  \
    } else {                                                             \
      FILL_8_OR_16((DST), 129, (SIZE));                                  \
    }                                                                    \
-}
+  }
 HORIZONTAL_PRED(dst, left, 8)
 HORIZONTAL_PRED(dst, left, 16)
@@ -420,10 +427,12 @@ HORIZONTAL_PRED(dst, left, 16)
  "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t" \
  "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"
-#define CLIP_8B_TO_DST(DST, LEFT, TOP, SIZE) do {                              \
+// clang-format off
 #define CLIP_8B_TO_DST(DST, LEFT, TOP, SIZE)                                   \
  do {                                                                         \
    int leftY_1 = ((int)(LEFT)[y] << 16) + (LEFT)[y];                          \
    int temp0, temp1, temp2, temp3;                                            \
-  __asm__ volatile (                                                           \
+    __asm__ volatile(                                                          \
      "replv.ph        %[leftY_1], %[leftY_1]              \n\t"               \
      "ulw             %[temp0],   0(%[top])               \n\t"               \
      "ulw             %[temp1],   4(%[top])               \n\t"               \
@@ -443,17 +452,20 @@ HORIZONTAL_PRED(dst, left, 16)
      : [left_1]"r"(left_1), [top]"r"((TOP)), [dst]"r"((DST))                  \
      : "memory"                                                               \
    );                                                                         \
-} while (0)
+  } while (0)
 // clang-format on
-#define CLIP_TO_DST(DST, LEFT, TOP, SIZE) do {                                 \
+#define CLIP_TO_DST(DST, LEFT, TOP, SIZE)                    \
  do {                                                       \
    int y;                                                   \
    const int left_1 = ((int)(LEFT)[-1] << 16) + (LEFT)[-1]; \
    for (y = 0; y < (SIZE); ++y) {                           \
      CLIP_8B_TO_DST((DST), (LEFT), (TOP), (SIZE));          \
      (DST) += BPS;                                          \
    }                                                        \
-} while (0)
+  } while (0)
 // clang-format off
 #define TRUE_MOTION(DST, LEFT, TOP, SIZE)                                      \
 static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST),         \
                                         const uint8_t* WEBP_RESTRICT (LEFT),  \
@@ -476,6 +488,7 @@ static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST),         \
    }                                                                          \
  }                                                                            \
 }
 // clang-format on
 TRUE_MOTION(dst, left, top, 8)
 TRUE_MOTION(dst, left, top, 16)
@@ -586,11 +599,10 @@ static WEBP_INLINE void DCMode8(uint8_t* WEBP_RESTRICT dst,
      "4:                                          \n\t"
      "li          %[DC], 0x80                   \n\t"
      "5:                                          \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [DC]"=&r"(DC),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [DC] "=&r"(DC),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [DC1]"=&r"(DC1)
+        [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [DC1] "=&r"(DC1)
-    : [left]"r"(left), [top]"r"(top)
+      : [left] "r"(left), [top] "r"(top)
-    : "memory"
+      : "memory");
  );
  FILL_8_OR_16(dst, DC, 8);
 }
@@ -619,7 +631,7 @@ static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
 static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int a10, a32, temp0, temp1, temp2, temp3, temp4, temp5;
  const int c35 = 0xff00ff;
-  __asm__ volatile (
+  __asm__ volatile(
    "lbu              %[temp1],  0(%[top])                     \n\t"
    "lbu              %[a10],    1(%[top])                     \n\t"
    "lbu              %[temp2],  2(%[top])                     \n\t"
@@ -790,7 +802,7 @@ static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
 static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw              %[temp0],   -4(%[top])              \n\t"
    "ulw              %[temp1],   0(%[top])               \n\t"
    "preceu.ph.qbl    %[temp2],   %[temp0]                \n\t"
@@ -887,7 +899,7 @@ static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
 static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw              %[temp0],   0(%[top])               \n\t"
    "ulw              %[temp1],   4(%[top])               \n\t"
    "preceu.ph.qbla   %[temp2],   %[temp0]                \n\t"
@@ -936,7 +948,7 @@ static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
 static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw              %[temp0],   -5(%[top])              \n\t"
    "ulw              %[temp1],   -1(%[top])              \n\t"
    "preceu.ph.qbla   %[temp2],   %[temp0]                \n\t"
@@ -983,7 +995,7 @@ static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
 static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw             %[temp0],   -5(%[top])              \n\t"
    "preceu.ph.qbl   %[temp1],   %[temp0]                \n\t"
    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
@@ -1071,6 +1083,7 @@ static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
 #if !defined(WORK_AROUND_GCC)
 // clang-format off
 #define GET_SSE_INNER(A)                                                  \
  "lw               %[temp0],    " #A "(%[a])                  \n\t"      \
  "lw               %[temp1],    " #A "(%[b])                  \n\t"      \
@@ -1082,6 +1095,7 @@ static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
  "subq.ph          %[temp0],    %[temp0],    %[temp1]         \n\t"      \
  "dpa.w.ph         $ac0,        %[temp2],    %[temp2]         \n\t"      \
  "dpa.w.ph         $ac0,        %[temp0],    %[temp0]         \n\t"
 // clang-format on
 #define GET_SSE(A, B, C, D) \
  GET_SSE_INNER(A)          \
@@ -1093,30 +1107,29 @@ static int SSE16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
                              const uint8_t* WEBP_RESTRICT b) {
  int count;
  int temp0, temp1, temp2, temp3;
-  __asm__ volatile (
+  __asm__ volatile(
-    "mult   $zero,    $zero                            \n\t"
+      "mult   $zero,    $zero                            \n\t"      //
-    GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)      //
-    GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
+      GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)      //
-    GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)      //
-    GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
+      GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)      //
-    GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)      //
-    GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
+      GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)      //
-    GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)      //
-    GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
+      GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)      //
-    GET_SSE( 8 * BPS, 4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS)
+      GET_SSE(8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS)      //
-    GET_SSE( 9 * BPS, 4 +  9 * BPS, 8 +  9 * BPS, 12 +  9 * BPS)
+      GET_SSE(9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS)      //
-    GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
+      GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)  //
-    GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
+      GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)  //
-    GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
+      GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)  //
-    GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
+      GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)  //
-    GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
+      GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)  //
-    GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
+      GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)  //
      "mflo   %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [count]"=&r"(count)
+        [temp3] "=&r"(temp3), [count] "=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
+      : [a] "r"(a), [b] "r"(b)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
  return count;
 }
@@ -1124,22 +1137,21 @@ static int SSE16x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
                             const uint8_t* WEBP_RESTRICT b) {
  int count;
  int temp0, temp1, temp2, temp3;
-  __asm__ volatile (
+  __asm__ volatile(
-    "mult   $zero,    $zero                            \n\t"
+      "mult   $zero,    $zero                            \n\t"  //
-    GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)  //
-    GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
+      GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)  //
-    GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)  //
-    GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
+      GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)  //
-    GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)  //
-    GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
+      GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)  //
-    GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)  //
-    GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
+      GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)  //
      "mflo   %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [count]"=&r"(count)
+        [temp3] "=&r"(temp3), [count] "=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
+      : [a] "r"(a), [b] "r"(b)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
  return count;
 }
@@ -1147,18 +1159,17 @@ static int SSE8x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
                            const uint8_t* WEBP_RESTRICT b) {
  int count;
  int temp0, temp1, temp2, temp3;
-  __asm__ volatile (
+  __asm__ volatile(
-    "mult   $zero,    $zero                            \n\t"
+      "mult   $zero,    $zero                            \n\t"  //
-    GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)       //
-    GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)       //
-    GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)       //
-    GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)       //
      "mflo   %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [count]"=&r"(count)
+        [temp3] "=&r"(temp3), [count] "=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
+      : [a] "r"(a), [b] "r"(b)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
  return count;
 }
@@ -1166,15 +1177,14 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
                            const uint8_t* WEBP_RESTRICT b) {
  int count;
  int temp0, temp1, temp2, temp3;
-  __asm__ volatile (
+  __asm__ volatile(
-    "mult   $zero,    $zero                            \n\t"
+      "mult   $zero,    $zero                            \n\t"  //
-    GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
+      GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)               //
      "mflo   %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [count]"=&r"(count)
+        [temp3] "=&r"(temp3), [count] "=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
+      : [a] "r"(a), [b] "r"(b)
-    : "memory", "hi", "lo"
+      : "memory", "hi", "lo");
  );
  return count;
 }
@@ -1200,6 +1210,7 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
 // K - offset in bytes (kZigzag[n] * 4)
 // N - offset in bytes (n * 2)
 // N1 - offset in bytes ((n + 1) * 2)
 // clang-format off
 #define QUANTIZE_ONE(J, K, N, N1)                                         \
  "ulw         %[temp1],     " #J "(%[ppin])                 \n\t"        \
  "ulw         %[temp2],     " #J "(%[ppsharpen])            \n\t"        \
@@ -1285,10 +1296,11 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
  "sh          $0,           " #N1 "(%[pout])                \n\t"        \
  "usw         $0,           " #J "(%[ppin])                 \n\t"        \
 "3:                                                          \n\t"
 // clang-format on
 static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
                                   const VP8Matrix* WEBP_RESTRICT const mtx) {
-  int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
  int sign, coeff, level;
  int max_level = MAX_LEVEL;
  int max_level1 = max_level << 16 | max_level;
@@ -1302,27 +1314,24 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
  const uint16_t* ppiq = &mtx->iq[0];
  const uint32_t* ppbias = &mtx->bias[0];
-  __asm__ volatile (
+  __asm__ volatile(
-    QUANTIZE_ONE( 0,  0,  0,  2)
+      QUANTIZE_ONE(0, 0, 0, 2)      //
-    QUANTIZE_ONE( 4,  8, 10, 12)
+      QUANTIZE_ONE(4, 8, 10, 12)    //
-    QUANTIZE_ONE( 8, 16,  4,  8)
+      QUANTIZE_ONE(8, 16, 4, 8)     //
-    QUANTIZE_ONE(12, 24, 14, 24)
+      QUANTIZE_ONE(12, 24, 14, 24)  //
-    QUANTIZE_ONE(16, 32,  6, 16)
+      QUANTIZE_ONE(16, 32, 6, 16)   //
-    QUANTIZE_ONE(20, 40, 22, 26)
+      QUANTIZE_ONE(20, 40, 22, 26)  //
-    QUANTIZE_ONE(24, 48, 18, 20)
+      QUANTIZE_ONE(24, 48, 18, 20)  //
-    QUANTIZE_ONE(28, 56, 28, 30)
+      QUANTIZE_ONE(28, 56, 28, 30)  //
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [sign] "=&r"(sign), [coeff] "=&r"(coeff), [level] "=&r"(level),
-      [sign]"=&r"(sign), [coeff]"=&r"(coeff),
+        [temp6] "=&r"(temp6), [ret] "+&r"(ret)
-      [level]"=&r"(level), [temp6]"=&r"(temp6), [ret]"+&r"(ret)
+      : [ppin] "r"(ppin), [pout] "r"(pout), [max_level1] "r"(max_level1),
-    : [ppin]"r"(ppin), [pout]"r"(pout), [max_level1]"r"(max_level1),
+        [ppiq] "r"(ppiq), [max_level] "r"(max_level), [ppbias] "r"(ppbias),
-      [ppiq]"r"(ppiq), [max_level]"r"(max_level),
+        [ppzthresh] "r"(ppzthresh), [ppsharpen] "r"(ppsharpen), [ppq] "r"(ppq)
-      [ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
+      : "memory", "hi", "lo");
      [ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
    : "memory", "hi", "lo"
  );
  return (ret != 0);
 }
@@ -1341,6 +1350,7 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
 // temp0..temp7 holds tmp[0]..tmp[15]
 // A, B, C, D - offset in bytes to load from in buffer
 // TEMP0, TEMP1 - registers for corresponding tmp elements
 // clang-format off
 #define HORIZONTAL_PASS_WHT(A, B, C, D, TEMP0, TEMP1)                          \
  "lh              %[" #TEMP0 "],  " #A "(%[in])            \n\t"              \
  "lh              %[" #TEMP1 "],  " #B "(%[in])            \n\t"              \
@@ -1373,26 +1383,26 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
  "usw             %[" #TEMP2 "],  " #B "(%[out])                 \n\t"        \
  "usw             %[" #TEMP4 "],  " #C "(%[out])                 \n\t"        \
  "usw             %[" #TEMP6 "],  " #D "(%[out])                 \n\t"
 // clang-format on
 static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
                                    int16_t* WEBP_RESTRICT out) {
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
+  __asm__ volatile(
-    HORIZONTAL_PASS_WHT(  0,  32,  64,  96, temp0, temp1)
+      HORIZONTAL_PASS_WHT(0, 32, 64, 96, temp0, temp1)              //
-    HORIZONTAL_PASS_WHT(128, 160, 192, 224, temp2, temp3)
+      HORIZONTAL_PASS_WHT(128, 160, 192, 224, temp2, temp3)         //
-    HORIZONTAL_PASS_WHT(256, 288, 320, 352, temp4, temp5)
+      HORIZONTAL_PASS_WHT(256, 288, 320, 352, temp4, temp5)         //
-    HORIZONTAL_PASS_WHT(384, 416, 448, 480, temp6, temp7)
+      HORIZONTAL_PASS_WHT(384, 416, 448, 480, temp6, temp7)         //
-    VERTICAL_PASS_WHT(0,  8, 16, 24, temp0, temp2, temp4, temp6)
+      VERTICAL_PASS_WHT(0, 8, 16, 24, temp0, temp2, temp4, temp6)   //
-    VERTICAL_PASS_WHT(4, 12, 20, 28, temp1, temp3, temp5, temp7)
+      VERTICAL_PASS_WHT(4, 12, 20, 28, temp1, temp3, temp5, temp7)  //
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8),
-      [temp9]"=&r"(temp9)
+        [temp9] "=&r"(temp9)
-    : [in]"r"(in), [out]"r"(out)
+      : [in] "r"(in), [out] "r"(out)
-    : "memory"
+      : "memory");
  );
 }
 #undef VERTICAL_PASS_WHT
@@ -1401,6 +1411,7 @@ static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
 // macro for converting coefficients to bin
 // convert 8 coeffs at time
 // A, B, C, D - offsets in bytes to load from out buffer
 // clang-format off
 #define CONVERT_COEFFS_TO_BIN(A, B, C, D)                                      \
  "ulw        %[temp0],  " #A "(%[out])                \n\t"                   \
  "ulw        %[temp1],  " #B "(%[out])                \n\t"                   \
@@ -1466,12 +1477,13 @@ static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
  "lw         %[temp8],  0(%[temp3])                   \n\t"                   \
  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
  "sw         %[temp8],  0(%[temp3])                   \n\t"
 // clang-format on
 static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
                                       int start_block, int end_block,
                                       VP8Histogram* const histo) {
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  const int max_coeff = (MAX_COEFF_THRESH << 16) + MAX_COEFF_THRESH;
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
@@ -1480,15 +1492,14 @@ static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
    // Convert coefficients to bin.
-    __asm__ volatile (
+    __asm__ volatile(
-      CONVERT_COEFFS_TO_BIN( 0,  4,  8, 12)
+        CONVERT_COEFFS_TO_BIN(0, 4, 8, 12)     //
-      CONVERT_COEFFS_TO_BIN(16, 20, 24, 28)
+        CONVERT_COEFFS_TO_BIN(16, 20, 24, 28)  //
-      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
-        [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+          [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
-        [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+          [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8)
-      : [dist]"r"(distribution), [out]"r"(out), [max_coeff]"r"(max_coeff)
+        : [dist] "r"(distribution), [out] "r"(out), [max_coeff] "r"(max_coeff)
-      : "memory"
+        : "memory");
    );
  }
  VP8SetHistogramData(distribution, histo);
 }
--- a/src/dsp/enc_msa.c
+++ b/src/dsp/enc_msa.c
@@ -16,13 +16,15 @@
 #if defined(WEBP_USE_MSA)
 #include <stdlib.h>
 #include "src/dsp/msa_macro.h"
 #include "src/enc/vp8i_enc.h"
 //------------------------------------------------------------------------------
 // Transforms
-#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) do {  \
+#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3)    \
  do {                                                           \
    v4i32 a1_m, b1_m, c1_m, d1_m;                                \
    const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091);         \
    const v4i32 sinpi8sqrt2 = __msa_fill_w(35468);               \
@@ -39,7 +41,7 @@
    d_tmp1_m = d_tmp1_m + in1;                                   \
    d1_m = d_tmp1_m + d_tmp2_m;                                  \
    BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
-} while (0)
+  } while (0)
 static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
                                      const int16_t* WEBP_RESTRICT in,
@@ -48,7 +50,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
  v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
  v4i32 res0, res1, res2, res3;
  v16i8 dest0, dest1, dest2, dest3;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  LD_SH2(in, 8, input0, input1);
  UNPCK_SH_SW(input0, in0, in1);
@@ -59,10 +61,10 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
  SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
  TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
  LD_SB4(ref, BPS, dest0, dest1, dest2, dest3);
-  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
+  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
-             res0, res1, res2, res3);
+             res2, res3);
-  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
+  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
-             res0, res1, res2, res3);
+             res3);
  ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
  CLIP_SW4_0_255(res0, res1, res2, res3);
  PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
@@ -86,13 +88,13 @@ static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
  uint32_t in0, in1, in2, in3;
  v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
  v8i16 t0, t1, t2, t3;
-  v16u8 srcl0, srcl1, src0 = { 0 }, src1 = { 0 };
+  v16u8 srcl0, srcl1, src0 = {0}, src1 = {0};
-  const v8i16 mask0 = { 0, 4, 8, 12, 1, 5, 9, 13 };
+  const v8i16 mask0 = {0, 4, 8, 12, 1, 5, 9, 13};
-  const v8i16 mask1 = { 3, 7, 11, 15, 2, 6, 10, 14 };
+  const v8i16 mask1 = {3, 7, 11, 15, 2, 6, 10, 14};
-  const v8i16 mask2 = { 4, 0, 5, 1, 6, 2, 7, 3 };
+  const v8i16 mask2 = {4, 0, 5, 1, 6, 2, 7, 3};
-  const v8i16 mask3 = { 0, 4, 1, 5, 2, 6, 3, 7 };
+  const v8i16 mask3 = {0, 4, 1, 5, 2, 6, 3, 7};
-  const v8i16 cnst0 = { 2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352 };
+  const v8i16 cnst0 = {2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352};
-  const v8i16 cnst1 = { 5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217 };
+  const v8i16 cnst1 = {5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217};
  LW4(src, BPS, in0, in1, in2, in3);
  INSERT_W4_UB(in0, in1, in2, in3, src0);
@@ -136,29 +138,29 @@ static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
 static void FTransformWHT_MSA(const int16_t* WEBP_RESTRICT in,
                              int16_t* WEBP_RESTRICT out) {
-  v8i16 in0 = { 0 };
+  v8i16 in0 = {0};
-  v8i16 in1 = { 0 };
+  v8i16 in1 = {0};
  v8i16 tmp0, tmp1, tmp2, tmp3;
  v8i16 out0, out1;
-  const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
+  const v8i16 mask0 = {0, 1, 2, 3, 8, 9, 10, 11};
-  const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
+  const v8i16 mask1 = {4, 5, 6, 7, 12, 13, 14, 15};
-  const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
+  const v8i16 mask2 = {0, 4, 8, 12, 1, 5, 9, 13};
-  const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
+  const v8i16 mask3 = {3, 7, 11, 15, 2, 6, 10, 14};
-  in0 = __msa_insert_h(in0, 0, in[  0]);
+  in0 = __msa_insert_h(in0, 0, in[0]);
-  in0 = __msa_insert_h(in0, 1, in[ 64]);
+  in0 = __msa_insert_h(in0, 1, in[64]);
  in0 = __msa_insert_h(in0, 2, in[128]);
  in0 = __msa_insert_h(in0, 3, in[192]);
-  in0 = __msa_insert_h(in0, 4, in[ 16]);
+  in0 = __msa_insert_h(in0, 4, in[16]);
-  in0 = __msa_insert_h(in0, 5, in[ 80]);
+  in0 = __msa_insert_h(in0, 5, in[80]);
  in0 = __msa_insert_h(in0, 6, in[144]);
  in0 = __msa_insert_h(in0, 7, in[208]);
-  in1 = __msa_insert_h(in1, 0, in[ 48]);
+  in1 = __msa_insert_h(in1, 0, in[48]);
  in1 = __msa_insert_h(in1, 1, in[112]);
  in1 = __msa_insert_h(in1, 2, in[176]);
  in1 = __msa_insert_h(in1, 3, in[240]);
-  in1 = __msa_insert_h(in1, 4, in[ 32]);
+  in1 = __msa_insert_h(in1, 4, in[32]);
-  in1 = __msa_insert_h(in1, 5, in[ 96]);
+  in1 = __msa_insert_h(in1, 5, in[96]);
  in1 = __msa_insert_h(in1, 6, in[160]);
  in1 = __msa_insert_h(in1, 7, in[224]);
  ADDSUB2(in0, in1, tmp0, tmp1);
@@ -176,14 +178,14 @@ static int TTransform_MSA(const uint8_t* WEBP_RESTRICT in,
                          const uint16_t* WEBP_RESTRICT w) {
  int sum;
  uint32_t in0_m, in1_m, in2_m, in3_m;
-  v16i8 src0 = { 0 };
+  v16i8 src0 = {0};
  v8i16 in0, in1, tmp0, tmp1, tmp2, tmp3;
  v4i32 dst0, dst1;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
-  const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
+  const v8i16 mask0 = {0, 1, 2, 3, 8, 9, 10, 11};
-  const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
+  const v8i16 mask1 = {4, 5, 6, 7, 12, 13, 14, 15};
-  const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
+  const v8i16 mask2 = {0, 4, 8, 12, 1, 5, 9, 13};
-  const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
+  const v8i16 mask3 = {3, 7, 11, 15, 2, 6, 10, 14};
  LW4(in, BPS, in0_m, in1_m, in2_m, in3_m);
  INSERT_W4_SB(in0_m, in1_m, in2_m, in3_m, src0);
@@ -233,14 +235,14 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
                                 int start_block, int end_block,
                                 VP8Histogram* const histo) {
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
    {
      int k;
      v8i16 coeff0, coeff1;
-      const v8i16 zero = { 0 };
+      const v8i16 zero = {0};
      const v8i16 max_coeff_thr = __msa_ldi_h(MAX_COEFF_THRESH);
      LD_SH2(&out[0], 8, coeff0, coeff1);
      coeff0 = __msa_add_a_h(coeff0, zero);
@@ -269,7 +271,7 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
 // vertical
 static WEBP_INLINE void VE4(uint8_t* WEBP_RESTRICT dst,
                            const uint8_t* WEBP_RESTRICT top) {
-  const v16u8 A1 = { 0 };
+  const v16u8 A1 = {0};
  const uint64_t val_m = LD(top - 1);
  const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
  const v16u8 B = SLDI_UB(A, A, 1);
@@ -307,7 +309,7 @@ static WEBP_INLINE void DC4(uint8_t* WEBP_RESTRICT dst,
 static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
                            const uint8_t* WEBP_RESTRICT top) {
-  const v16u8 A2 = { 0 };
+  const v16u8 A2 = {0};
  const uint64_t val_m = LD(top - 5);
  const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
  const v16u8 A = (v16u8)__msa_insert_b((v16i8)A1, 8, top[3]);
@@ -328,7 +330,7 @@ static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
 static WEBP_INLINE void LD4(uint8_t* WEBP_RESTRICT dst,
                            const uint8_t* WEBP_RESTRICT top) {
-  const v16u8 A1 = { 0 };
+  const v16u8 A1 = {0};
  const uint64_t val_m = LD(top);
  const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
  const v16u8 B = SLDI_UB(A, A, 1);
@@ -403,8 +405,7 @@ static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
  DST(1, 0) = AVG3(I, J, K);
  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
-  DST(3, 2) = DST(2, 2) =
+  DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }
 static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
@@ -431,7 +432,7 @@ static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
 static WEBP_INLINE void TM4(uint8_t* WEBP_RESTRICT dst,
                            const uint8_t* WEBP_RESTRICT top) {
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  const v8i16 TL = (v8i16)__msa_fill_h(top[-1]);
  const v8i16 L0 = (v8i16)__msa_fill_h(top[-2]);
  const v8i16 L1 = (v8i16)__msa_fill_h(top[-3]);
@@ -466,10 +467,11 @@ static void Intra4Preds_MSA(uint8_t* WEBP_RESTRICT dst,
 // luma 16x16 prediction
-#define STORE16x16(out, dst) do {                                        \
+#define STORE16x16(out, dst)                                            \
  do {                                                                  \
    ST_UB8(out, out, out, out, out, out, out, out, dst + 0 * BPS, BPS); \
    ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS); \
-} while (0)
+  } while (0)
 static WEBP_INLINE void VerticalPred16x16(uint8_t* WEBP_RESTRICT dst,
                                          const uint8_t* WEBP_RESTRICT top) {
@@ -508,7 +510,7 @@ static WEBP_INLINE void TrueMotion16x16(uint8_t* WEBP_RESTRICT dst,
    if (top != NULL) {
      int j;
      v8i16 d1, d2;
-      const v16i8 zero = { 0 };
+      const v16i8 zero = {0};
      const v8i16 TL = (v8i16)__msa_fill_h(left[-1]);
      const v16u8 T = LD_UB(top);
      ILVRL_B2_SH(zero, T, d1, d2);
@@ -582,7 +584,8 @@ static void Intra16Preds_MSA(uint8_t* WEBP_RESTRICT dst,
 // Chroma 8x8 prediction
-#define CALC_DC8(in, out) do {                              \
+#define CALC_DC8(in, out)                                    \
  do {                                                       \
    const v8u16 temp0 = __msa_hadd_u_h(in, in);              \
    const v4u32 temp1 = __msa_hadd_u_w(temp0, temp0);        \
    const v2i64 temp2 = (v2i64)__msa_hadd_u_d(temp1, temp1); \
@@ -591,12 +594,13 @@ static void Intra16Preds_MSA(uint8_t* WEBP_RESTRICT dst,
    const v16i8 temp5 = (v16i8)__msa_srari_d(temp4, 4);      \
    const v2i64 temp6 = (v2i64)__msa_splati_b(temp5, 0);     \
    out = __msa_copy_s_d(temp6, 0);                          \
-} while (0)
+  } while (0)
-#define STORE8x8(out, dst) do {                 \
+#define STORE8x8(out, dst)                       \
  do {                                           \
    SD4(out, out, out, out, dst + 0 * BPS, BPS); \
    SD4(out, out, out, out, dst + 4 * BPS, BPS); \
-} while (0)
+  } while (0)
 static WEBP_INLINE void VerticalPred8x8(uint8_t* WEBP_RESTRICT dst,
                                        const uint8_t* WEBP_RESTRICT top) {
@@ -640,7 +644,7 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* WEBP_RESTRICT dst,
      int j;
      const v8i16 TL = (v8i16)__msa_fill_h(left[-1]);
      const v16u8 T1 = LD_UB(top);
-      const v16i8 zero = { 0 };
+      const v16i8 zero = {0};
      const v8i16 T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
      const v8i16 d = T - TL;
      for (j = 0; j < 8; j += 4) {
@@ -677,7 +681,7 @@ static WEBP_INLINE void DCMode8x8(uint8_t* WEBP_RESTRICT dst,
                                  const uint8_t* WEBP_RESTRICT left,
                                  const uint8_t* WEBP_RESTRICT top) {
  uint64_t out;
-  v16u8 src = { 0 };
+  v16u8 src = {0};
  if (top != NULL && left != NULL) {
    const uint64_t left_m = LD(left);
    const uint64_t top_m = LD(top);
@@ -719,7 +723,8 @@ static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
 //------------------------------------------------------------------------------
 // Metric
-#define PACK_DOTP_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) do {  \
+#define PACK_DOTP_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
  do {                                                               \
    v16u8 tmp0, tmp1;                                                \
    v8i16 tmp2, tmp3;                                                \
    ILVRL_B2_UB(in0, in1, tmp0, tmp1);                               \
@@ -728,9 +733,10 @@ static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
    ILVRL_B2_UB(in2, in3, tmp0, tmp1);                               \
    HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                             \
    DOTP_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3);                 \
-} while (0)
+  } while (0)
-#define PACK_DPADD_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) do {  \
+#define PACK_DPADD_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
  do {                                                                \
    v16u8 tmp0, tmp1;                                                 \
    v8i16 tmp2, tmp3;                                                 \
    ILVRL_B2_UB(in0, in1, tmp0, tmp1);                                \
@@ -739,7 +745,7 @@ static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
    ILVRL_B2_UB(in2, in3, tmp0, tmp1);                                \
    HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                              \
    DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3);                 \
-} while (0)
+  } while (0)
 static int SSE16x16_MSA(const uint8_t* WEBP_RESTRICT a,
                        const uint8_t* WEBP_RESTRICT b) {
@@ -814,7 +820,7 @@ static int SSE4x4_MSA(const uint8_t* WEBP_RESTRICT a,
                      const uint8_t* WEBP_RESTRICT b) {
  uint32_t sum = 0;
  uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
-  v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
+  v16u8 src = {0}, ref = {0}, tmp0, tmp1;
  v8i16 diff0, diff1;
  v4i32 out0, out1;
@@ -839,9 +845,9 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
  v8i16 in0, in1, sh0, sh1, out0, out1;
  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
  v4i32 s0, s1, s2, s3, b0, b1, b2, b3, t0, t1, t2, t3;
-  const v8i16 zero = { 0 };
+  const v8i16 zero = {0};
-  const v8i16 zigzag0 = { 0, 1, 4, 8, 5, 2, 3, 6 };
+  const v8i16 zigzag0 = {0, 1, 4, 8, 5, 2, 3, 6};
-  const v8i16 zigzag1 = { 9, 12, 13, 10, 7, 11, 14, 15 };
+  const v8i16 zigzag1 = {9, 12, 13, 10, 7, 11, 14, 15};
  const v8i16 maxlevel = __msa_fill_h(MAX_LEVEL);
  LD_SH2(&in[0], 8, in0, in1);
--- a/src/dsp/enc_neon.c
+++ b/src/dsp/enc_neon.c
@@ -136,9 +136,9 @@ static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
                               const int16_t* WEBP_RESTRICT in,
                               uint8_t* WEBP_RESTRICT dst) {
  const int kBPS = BPS;
-  const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
+  const int16_t kC1C2[] = {kC1, kC2, 0, 0};
-  __asm__ volatile (
+  __asm__ volatile(
      "vld1.16         {q1, q2}, [%[in]]           \n"
      "vld1.16         {d0}, [%[kC1C2]]            \n"
@@ -272,24 +272,19 @@ static uint8x16_t Load4x4_NEON(const uint8_t* src) {
 #if defined(WEBP_USE_INTRINSICS)
-static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A,
+static WEBP_INLINE void Transpose4x4_S16_NEON(
-                                              const int16x4_t B,
+    const int16x4_t A, const int16x4_t B, const int16x4_t C, const int16x4_t D,
-                                              const int16x4_t C,
+    int16x8_t* const out01, int16x8_t* const out32) {
                                              const int16x4_t D,
                                              int16x8_t* const out01,
                                              int16x8_t* const out32) {
  const int16x4x2_t AB = vtrn_s16(A, B);
  const int16x4x2_t CD = vtrn_s16(C, D);
  const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]),
                                     vreinterpret_s32_s16(CD.val[0]));
  const int32x2x2_t tmp13 = vtrn_s32(vreinterpret_s32_s16(AB.val[1]),
                                     vreinterpret_s32_s16(CD.val[1]));
-  *out01 = vreinterpretq_s16_s64(
+  *out01 = vreinterpretq_s16_s64(vcombine_s64(
-      vcombine_s64(vreinterpret_s64_s32(tmp02.val[0]),
+      vreinterpret_s64_s32(tmp02.val[0]), vreinterpret_s64_s32(tmp13.val[0])));
-                   vreinterpret_s64_s32(tmp13.val[0])));
+  *out32 = vreinterpretq_s16_s64(vcombine_s64(
-  *out32 = vreinterpretq_s16_s64(
+      vreinterpret_s64_s32(tmp13.val[1]), vreinterpret_s64_s32(tmp02.val[1])));
      vcombine_s64(vreinterpret_s64_s32(tmp13.val[1]),
                   vreinterpret_s64_s32(tmp02.val[1])));
 }
 static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
@@ -318,10 +313,10 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
    const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2);  // d0+d3 | d1+d2   (=a0|a1)
    const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2);  // d0-d3 | d1-d2   (=a3|a2)
    const int16x8_t a0a1_2 = vshlq_n_s16(a0a1, 3);
-    const int16x4_t tmp0 = vadd_s16(vget_low_s16(a0a1_2),
+    const int16x4_t tmp0 =
-                                    vget_high_s16(a0a1_2));
+        vadd_s16(vget_low_s16(a0a1_2), vget_high_s16(a0a1_2));
-    const int16x4_t tmp2 = vsub_s16(vget_low_s16(a0a1_2),
+    const int16x4_t tmp2 =
-                                    vget_high_s16(a0a1_2));
+        vsub_s16(vget_low_s16(a0a1_2), vget_high_s16(a0a1_2));
    const int32x4_t a3_2217 = vmull_n_s16(vget_low_s16(a3a2), 2217);
    const int32x4_t a2_2217 = vmull_n_s16(vget_high_s16(a3a2), 2217);
    const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352);
@@ -358,15 +353,11 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
 #else
 // adapted from vp8/encoder/arm/neon/shortfdct_neon.asm
-static const int16_t kCoeff16[] = {
+static const int16_t kCoeff16[] = {5352, 5352, 5352, 5352,
-  5352,  5352,  5352, 5352, 2217,  2217,  2217, 2217
+                                   2217, 2217, 2217, 2217};
-};
+static const int32_t kCoeff32[] = {1812,  1812,  1812,  1812,  937,   937,
-static const int32_t kCoeff32[] = {
+                                   937,   937,   12000, 12000, 12000, 12000,
-   1812,  1812,  1812,  1812,
+                                   51000, 51000, 51000, 51000};
    937,   937,   937,   937,
  12000, 12000, 12000, 12000,
  51000, 51000, 51000, 51000
 };
 static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
                            const uint8_t* WEBP_RESTRICT ref,
@@ -377,7 +368,7 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
  const int16_t* coeff16 = kCoeff16;
  const int32_t* coeff32 = kCoeff32;
-  __asm__ volatile (
+  __asm__ volatile(
      // load src into q4, q5 in high half
      "vld1.8 {d8},  [%[src_ptr]], %[kBPS]      \n"
      "vld1.8 {d10}, [%[src_ptr]], %[kBPS]      \n"
@@ -420,9 +411,11 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
      "vsub.s16        d7, d0, d3               \n"  // a3 = d0 - d3
      "vadd.s16        d0, d4, d5               \n"  // a0 + a1
-    "vshl.s16        d0, d0, #3               \n" // temp[0+i*4] = (a0+a1) << 3
+      "vshl.s16        d0, d0, #3               \n"  // temp[0+i*4] = (a0+a1) <<
                                                     // 3
      "vsub.s16        d2, d4, d5               \n"  // a0 - a1
-    "vshl.s16        d2, d2, #3               \n" // (temp[2+i*4] = (a0-a1) << 3
+      "vshl.s16        d2, d2, #3               \n"  // (temp[2+i*4] = (a0-a1)
                                                     // << 3
      "vmlal.s16       q9, d7, d16              \n"  // a3*5352 + 1812
      "vmlal.s16       q10, d7, d17             \n"  // a3*2217 + 937
@@ -460,8 +453,10 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
      "vshr.s16        d0, d0, #4               \n"
      "vshr.s16        d2, d2, #4               \n"
-    "vmlal.s16       q11, d6, d17             \n" // c1*2217 + d1*5352 + 12000
+      "vmlal.s16       q11, d6, d17             \n"  // c1*2217 + d1*5352 +
-    "vmlsl.s16       q12, d6, d16             \n" // d1*2217 - c1*5352 + 51000
+                                                     // 12000
      "vmlsl.s16       q12, d6, d16             \n"  // d1*2217 - c1*5352 +
                                                     // 51000
      "vmvn            d4, d4                   \n"  // !(d1 == 0)
      // op[4] = (c1*2217 + d1*5352 + 12000)>>16
@@ -484,10 +479,11 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
 #endif
-#define LOAD_LANE_16b(VALUE, LANE) do {             \
+#define LOAD_LANE_16b(VALUE, LANE)                 \
  do {                                             \
    (VALUE) = vld1_lane_s16(src, (VALUE), (LANE)); \
    src += stride;                                 \
-} while (0)
+  } while (0)
 static void FTransformWHT_NEON(const int16_t* WEBP_RESTRICT src,
                               int16_t* WEBP_RESTRICT out) {
@@ -586,8 +582,8 @@ static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) {
  return q4_in;
 }
-static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
+static WEBP_INLINE int16x8x4_t
-    const int16x8x4_t q4_in) {
+DistoHorizontalPass_NEON(const int16x8x4_t q4_in) {
  // {a0, a1} = {in[0] + in[2], in[1] + in[3]}
  // {a3, a2} = {in[0] - in[2], in[1] - in[3]}
  const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
@@ -599,26 +595,24 @@ static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
  // tmp[1] = a3 + a2
  // tmp[2] = a3 - a2
  // tmp[3] = a0 - a1
-  INIT_VECTOR4(q4_out,
+  INIT_VECTOR4(q4_out, vabsq_s16(vaddq_s16(q_a0, q_a1)),
-               vabsq_s16(vaddq_s16(q_a0, q_a1)),
+               vabsq_s16(vaddq_s16(q_a3, q_a2)), vabdq_s16(q_a3, q_a2),
-               vabsq_s16(vaddq_s16(q_a3, q_a2)),
+               vabdq_s16(q_a0, q_a1));
               vabdq_s16(q_a3, q_a2), vabdq_s16(q_a0, q_a1));
  return q4_out;
 }
 static WEBP_INLINE int16x8x4_t DistoVerticalPass_NEON(const uint8x8x4_t q4_in) {
-  const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0],
+  const int16x8_t q_a0 =
-                                                        q4_in.val[2]));
+      vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0], q4_in.val[2]));
-  const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1],
+  const int16x8_t q_a1 =
-                                                        q4_in.val[3]));
+      vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1], q4_in.val[3]));
-  const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[1],
+  const int16x8_t q_a2 =
-                                                        q4_in.val[3]));
+      vreinterpretq_s16_u16(vsubl_u8(q4_in.val[1], q4_in.val[3]));
-  const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[0],
+  const int16x8_t q_a3 =
-                                                        q4_in.val[2]));
+      vreinterpretq_s16_u16(vsubl_u8(q4_in.val[0], q4_in.val[2]));
  int16x8x4_t q4_out;
-  INIT_VECTOR4(q4_out,
+  INIT_VECTOR4(q4_out, vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
               vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
               vsubq_s16(q_a3, q_a2), vsubq_s16(q_a0, q_a1));
  return q4_out;
 }
@@ -627,8 +621,7 @@ static WEBP_INLINE int16x4x4_t DistoLoadW_NEON(const uint16_t* w) {
  const uint16x8_t q_w07 = vld1q_u16(&w[0]);
  const uint16x8_t q_w8f = vld1q_u16(&w[8]);
  int16x4x4_t d4_w;
-  INIT_VECTOR4(d4_w,
+  INIT_VECTOR4(d4_w, vget_low_s16(vreinterpretq_s16_u16(q_w07)),
               vget_low_s16(vreinterpretq_s16_u16(q_w07)),
               vget_high_s16(vreinterpretq_s16_u16(q_w07)),
               vget_low_s16(vreinterpretq_s16_u16(q_w8f)),
               vget_high_s16(vreinterpretq_s16_u16(q_w8f)));
@@ -683,8 +676,7 @@ static int Disto4x4_NEON(const uint8_t* WEBP_RESTRICT const a,
  LOAD_LANE_32b(b + 1 * BPS, d_in_ab_4567, 1);
  LOAD_LANE_32b(b + 2 * BPS, d_in_ab_89ab, 1);
  LOAD_LANE_32b(b + 3 * BPS, d_in_ab_cdef, 1);
-  INIT_VECTOR4(d4_in,
+  INIT_VECTOR4(d4_in, vreinterpret_u8_u32(d_in_ab_0123),
               vreinterpret_u8_u32(d_in_ab_0123),
               vreinterpret_u8_u32(d_in_ab_4567),
               vreinterpret_u8_u32(d_in_ab_89ab),
               vreinterpret_u8_u32(d_in_ab_cdef));
@@ -729,7 +721,7 @@ static void CollectHistogram_NEON(const uint8_t* WEBP_RESTRICT ref,
                                  VP8Histogram* WEBP_RESTRICT const histo) {
  const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    FTransform_NEON(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
@@ -762,10 +754,10 @@ static WEBP_INLINE void AccumulateSSE16_NEON(
  const uint8x16_t a0 = vld1q_u8(a);
  const uint8x16_t b0 = vld1q_u8(b);
  const uint8x16_t abs_diff = vabdq_u8(a0, b0);
-  const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
+  const uint16x8_t prod1 =
-                                    vget_low_u8(abs_diff));
+      vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
-  const uint16x8_t prod2 = vmull_u8(vget_high_u8(abs_diff),
+  const uint16x8_t prod2 =
-                                    vget_high_u8(abs_diff));
+      vmull_u8(vget_high_u8(abs_diff), vget_high_u8(abs_diff));
  /* pair-wise adds and widen */
  const uint32x4_t sum1 = vpaddlq_u16(prod1);
  const uint32x4_t sum2 = vpaddlq_u16(prod2);
@@ -823,10 +815,10 @@ static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
  const uint8x16_t a0 = Load4x4_NEON(a);
  const uint8x16_t b0 = Load4x4_NEON(b);
  const uint8x16_t abs_diff = vabdq_u8(a0, b0);
-  const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
+  const uint16x8_t prod1 =
-                                    vget_low_u8(abs_diff));
+      vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
-  const uint16x8_t prod2 = vmull_u8(vget_high_u8(abs_diff),
+  const uint16x8_t prod2 =
-                                    vget_high_u8(abs_diff));
+      vmull_u8(vget_high_u8(abs_diff), vget_high_u8(abs_diff));
  /* pair-wise adds and widen */
  const uint32x4_t sum1 = vpaddlq_u16(prod1);
  const uint32x4_t sum2 = vpaddlq_u16(prod2);
@@ -855,8 +847,8 @@ static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
  const uint32x4_t m1 = vmull_u16(vget_high_u16(c), vget_high_u16(iq));
  const uint32x4_t m2 = vhaddq_u32(m0, bias0);
  const uint32x4_t m3 = vhaddq_u32(m1, bias1);  // (coeff * iQ + bias) >> 1
-  const uint16x8_t c0 = vcombine_u16(vshrn_n_u32(m2, 16),
+  const uint16x8_t c0 =
-                                     vshrn_n_u32(m3, 16));   // QFIX=17 = 16+1
+      vcombine_u16(vshrn_n_u32(m2, 16), vshrn_n_u32(m3, 16));  // QFIX=17 = 16+1
  const uint16x8_t c1 = vminq_u16(c0, vdupq_n_u16(MAX_LEVEL));
  const int16x8_t c2 = veorq_s16(vreinterpretq_s16_u16(c1), sign);
  const int16x8_t c3 = vsubq_s16(c2, sign);  // restore sign
@@ -866,12 +858,10 @@ static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
  return c3;
 }
-static const uint8_t kShuffles[4][8] = {
+static const uint8_t kShuffles[4][8] = {{0, 1, 2, 3, 8, 9, 16, 17},
-  { 0,   1,  2,  3,  8,  9, 16, 17 },
+                                        {10, 11, 4, 5, 6, 7, 12, 13},
-  { 10, 11,  4,  5,  6,  7, 12, 13 },
+                                        {18, 19, 24, 25, 26, 27, 20, 21},
-  { 18, 19, 24, 25, 26, 27, 20, 21 },
+                                        {14, 15, 22, 23, 28, 29, 30, 31}};
  { 14, 15, 22, 23, 28, 29, 30, 31 }
 };
 static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
                              const VP8Matrix* WEBP_RESTRICT const mtx) {
@@ -880,24 +870,21 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
  uint8x8x4_t shuffles;
  // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
  // non-standard versions there.
-#if defined(__APPLE__) && WEBP_AARCH64 && \
+#if defined(__APPLE__) && WEBP_AARCH64 && defined(__apple_build_version__) && \
-    defined(__apple_build_version__) && (__apple_build_version__< 6020037)
+    (__apple_build_version__ < 6020037)
  uint8x16x2_t all_out;
  INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
-  INIT_VECTOR4(shuffles,
+  INIT_VECTOR4(shuffles, vtbl2q_u8(all_out, vld1_u8(kShuffles[0])),
               vtbl2q_u8(all_out, vld1_u8(kShuffles[0])),
               vtbl2q_u8(all_out, vld1_u8(kShuffles[1])),
               vtbl2q_u8(all_out, vld1_u8(kShuffles[2])),
               vtbl2q_u8(all_out, vld1_u8(kShuffles[3])));
 #else
  uint8x8x4_t all_out;
-  INIT_VECTOR4(all_out,
+  INIT_VECTOR4(all_out, vreinterpret_u8_s16(vget_low_s16(out0)),
               vreinterpret_u8_s16(vget_low_s16(out0)),
               vreinterpret_u8_s16(vget_high_s16(out0)),
               vreinterpret_u8_s16(vget_low_s16(out1)),
               vreinterpret_u8_s16(vget_high_s16(out1)));
-  INIT_VECTOR4(shuffles,
+  INIT_VECTOR4(shuffles, vtbl4_u8(all_out, vld1_u8(kShuffles[0])),
               vtbl4_u8(all_out, vld1_u8(kShuffles[0])),
               vtbl4_u8(all_out, vld1_u8(kShuffles[1])),
               vtbl4_u8(all_out, vld1_u8(kShuffles[2])),
               vtbl4_u8(all_out, vld1_u8(kShuffles[3])));
@@ -960,8 +947,7 @@ static WEBP_INLINE uint8x16x4_t Vld1qU8x4(const uint8_t* ptr) {
  return vld1q_u8_x4(ptr);
 #else
  uint8x16x4_t res;
-  INIT_VECTOR4(res,
+  INIT_VECTOR4(res, vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
               vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
               vld1q_u8(ptr + 2 * 16), vld1q_u8(ptr + 3 * 16));
  return res;
 #endif
@@ -976,22 +962,19 @@ static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
      0, 0,  1,  2,  3, 4,  5,  6,  7, 8,  9, 10, 11, 12, 12, 12,
      3, 3,  3,  3,  2, 2,  2,  2,  1, 1,  1, 1,  0,  0,  0,  0,
      4, 20, 21, 22, 3, 18, 2,  17, 3, 19, 4, 20, 2,  17, 1,  16,
-    2, 18,  3, 19,  1, 16, 31, 31,  1, 17,  2, 18, 31, 31, 31, 31
+      2, 18, 3,  19, 1, 16, 31, 31, 1, 17, 2, 18, 31, 31, 31, 31};
  };
  static const uint8_t kLookupTbl2[64] = {
      20, 21, 22, 23, 5,  6,  7,  8,  22, 23, 24, 25, 6,  7,  8,  9,
      19, 20, 21, 22, 20, 21, 22, 23, 23, 24, 25, 26, 22, 23, 24, 25,
      18, 19, 20, 21, 19, 5,  6,  7,  24, 25, 26, 27, 7,  8,  9,  26,
-    17, 18, 19, 20, 18, 20, 21, 22, 25, 26, 27, 28, 23, 24, 25, 27
+      17, 18, 19, 20, 18, 20, 21, 22, 25, 26, 27, 28, 23, 24, 25, 27};
  };
  static const uint8_t kLookupTbl3[64] = {
      30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 19, 19, 19, 19,
      30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 18, 18, 18, 18,
      30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 17, 17, 17, 17,
-    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 16, 16, 16, 16
+      30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 16, 16, 16, 16};
  };
  const uint8x16x4_t lookup_avgs1 = Vld1qU8x4(kLookupTbl1);
  const uint8x16x4_t lookup_avgs2 = Vld1qU8x4(kLookupTbl2);
--- a/src/dsp/enc_sse2.c
+++ b/src/dsp/enc_sse2.c
@@ -14,9 +14,8 @@
 #include "src/dsp/dsp.h"
 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>
 #include <assert.h>
 #include <emmintrin.h>
 #include <stdlib.h>  // for abs()
 #include <string.h>
@@ -50,10 +49,10 @@ static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
  //   variable and the multiplication of that variable by the associated
  //   constant:
  //      (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
-  const __m128i k1k2 = _mm_set_epi16(-30068, -30068, -30068, -30068,
+  const __m128i k1k2 =
-                                     20091, 20091, 20091, 20091);
+      _mm_set_epi16(-30068, -30068, -30068, -30068, 20091, 20091, 20091, 20091);
-  const __m128i k2k1 = _mm_set_epi16(20091, 20091, 20091, 20091,
+  const __m128i k2k1 =
-                                     -30068, -30068, -30068, -30068);
+      _mm_set_epi16(20091, 20091, 20091, 20091, -30068, -30068, -30068, -30068);
  const __m128i zero = _mm_setzero_si128();
  const __m128i zero_four = _mm_set_epi16(0, 0, 0, 0, 4, 4, 4, 4);
  __m128i T01, T23;
@@ -172,7 +171,7 @@ static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
    // Unsigned saturate to 8b.
    ref0123 = _mm_packus_epi16(ref01, ref23);
-    _mm_storeu_si128((__m128i *)buf, ref0123);
+    _mm_storeu_si128((__m128i*)buf, ref0123);
    // Store four bytes/pixels per line.
    WebPInt32ToMem(&dst[0 * BPS], buf[0]);
@@ -325,8 +324,7 @@ static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
 // Does one or two inverse transforms.
 static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,
                            const int16_t* WEBP_RESTRICT in,
-                            uint8_t* WEBP_RESTRICT dst,
+                            uint8_t* WEBP_RESTRICT dst, int do_two) {
                            int do_two) {
  if (do_two) {
    ITransform_Two_SSE2(ref, in, dst);
  } else {
@@ -336,17 +334,16 @@ static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,
 static void FTransformPass1_SSE2(const __m128i* const in01,
                                 const __m128i* const in23,
-                                 __m128i* const out01,
+                                 __m128i* const out01, __m128i* const out32) {
                                 __m128i* const out32) {
  const __m128i k937 = _mm_set1_epi32(937);
  const __m128i k1812 = _mm_set1_epi32(1812);
  const __m128i k88p = _mm_set_epi16(8, 8, 8, 8, 8, 8, 8, 8);
  const __m128i k88m = _mm_set_epi16(-8, 8, -8, 8, -8, 8, -8, 8);
-  const __m128i k5352_2217p = _mm_set_epi16(2217, 5352, 2217, 5352,
+  const __m128i k5352_2217p =
-                                            2217, 5352, 2217, 5352);
+      _mm_set_epi16(2217, 5352, 2217, 5352, 2217, 5352, 2217, 5352);
-  const __m128i k5352_2217m = _mm_set_epi16(-5352, 2217, -5352, 2217,
+  const __m128i k5352_2217m =
-                                            -5352, 2217, -5352, 2217);
+      _mm_set_epi16(-5352, 2217, -5352, 2217, -5352, 2217, -5352, 2217);
  // *in01 = 00 01 10 11 02 03 12 13
  // *in23 = 20 21 30 31 22 23 32 33
@@ -385,10 +382,10 @@ static void FTransformPass2_SSE2(const __m128i* const v01,
                                 int16_t* WEBP_RESTRICT out) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i seven = _mm_set1_epi16(7);
-  const __m128i k5352_2217 = _mm_set_epi16(5352,  2217, 5352,  2217,
+  const __m128i k5352_2217 =
-                                           5352,  2217, 5352,  2217);
+      _mm_set_epi16(5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217);
-  const __m128i k2217_5352 = _mm_set_epi16(2217, -5352, 2217, -5352,
+  const __m128i k2217_5352 =
-                                           2217, -5352, 2217, -5352);
+      _mm_set_epi16(2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352);
  const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
  const __m128i k51000 = _mm_set1_epi32(51000);
@@ -586,7 +583,7 @@ static void CollectHistogram_SSE2(const uint8_t* WEBP_RESTRICT ref,
  const __m128i zero = _mm_setzero_si128();
  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    int k;
@@ -927,7 +924,7 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* WEBP_RESTRICT dst,
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcdefg));
  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
@@ -950,8 +947,8 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* WEBP_RESTRICT dst,
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcd    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcd));
-  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               efgh    ));
+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(efgh));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
@@ -978,8 +975,8 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* WEBP_RESTRICT dst,
  const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
  const uint32_t extra_out =
      (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               avg1    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(avg1));
-  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               avg4    ));
+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(avg4));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
@@ -1000,7 +997,7 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* WEBP_RESTRICT dst,
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
-  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(abcdefg));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
@@ -1018,8 +1015,7 @@ static WEBP_INLINE void HU4_SSE2(uint8_t* WEBP_RESTRICT dst,
  DST(1, 0) = AVG3(I, J, K);
  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
-  DST(3, 2) = DST(2, 2) =
+  DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }
 static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
@@ -1495,10 +1491,10 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(
  {
    __m128i outZ0, outZ8;
    outZ0 = _mm_shufflehi_epi16(out0, _MM_SHUFFLE(2, 1, 3, 0));
-    outZ0 = _mm_shuffle_epi32  (outZ0, _MM_SHUFFLE(3, 1, 2, 0));
+    outZ0 = _mm_shuffle_epi32(outZ0, _MM_SHUFFLE(3, 1, 2, 0));
    outZ0 = _mm_shufflehi_epi16(outZ0, _MM_SHUFFLE(3, 1, 0, 2));
    outZ8 = _mm_shufflelo_epi16(out8, _MM_SHUFFLE(3, 0, 2, 1));
-    outZ8 = _mm_shuffle_epi32  (outZ8, _MM_SHUFFLE(3, 1, 2, 0));
+    outZ8 = _mm_shuffle_epi32(outZ8, _MM_SHUFFLE(3, 1, 2, 0));
    outZ8 = _mm_shufflelo_epi16(outZ8, _MM_SHUFFLE(1, 3, 2, 0));
    _mm_storeu_si128((__m128i*)&out[0], outZ0);
    _mm_storeu_si128((__m128i*)&out[8], outZ8);
--- a/src/dsp/enc_sse41.c
+++ b/src/dsp/enc_sse41.c
@@ -16,7 +16,6 @@
 #if defined(WEBP_USE_SSE41)
 #include <emmintrin.h>
 #include <smmintrin.h>
 #include <stdlib.h>  // for abs()
 #include "src/dsp/common_sse2.h"
@@ -33,7 +32,7 @@ static void CollectHistogram_SSE41(const uint8_t* WEBP_RESTRICT ref,
                                   VP8Histogram* WEBP_RESTRICT const histo) {
  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    int k;
@@ -198,7 +197,7 @@ static int Disto16x16_SSE41(const uint8_t* WEBP_RESTRICT const a,
 //
 // Generates a pshufb constant for shuffling 16b words.
-#define PSHUFB_CST(A,B,C,D,E,F,G,H) \
+#define PSHUFB_CST(A, B, C, D, E, F, G, H)                         \
  _mm_set_epi8(2 * (H) + 1, 2 * (H) + 0, 2 * (G) + 1, 2 * (G) + 0, \
               2 * (F) + 1, 2 * (F) + 0, 2 * (E) + 1, 2 * (E) + 0, \
               2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, \
--- a/src/dsp/filters.c
+++ b/src/dsp/filters.c
@@ -124,8 +124,7 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
    // leftmost pixel: predict from above.
    PredictLine_C(in, preds - stride, out, 1);
    for (w = 1; w < width; ++w) {
-      const int pred = GradientPredictor_C(preds[w - 1],
+      const int pred = GradientPredictor_C(preds[w - 1], preds[w - stride],
                                           preds[w - stride],
                                           preds[w - stride - 1]);
      out[w] = (uint8_t)(in[w] - pred);
    }
@@ -141,20 +140,20 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
 //------------------------------------------------------------------------------
 #if !WEBP_NEON_OMIT_C_CODE
-static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data,
+static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data, int width,
-                               int width, int height, int stride,
+                               int height, int stride,
                               uint8_t* WEBP_RESTRICT filtered_data) {
  DoHorizontalFilter_C(data, width, height, stride, filtered_data);
 }
-static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data,
+static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data, int width,
-                             int width, int height, int stride,
+                             int height, int stride,
                             uint8_t* WEBP_RESTRICT filtered_data) {
  DoVerticalFilter_C(data, width, height, stride, filtered_data);
 }
-static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,
+static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data, int width,
-                             int width, int height, int stride,
+                             int height, int stride,
                             uint8_t* WEBP_RESTRICT filtered_data) {
  DoGradientFilter_C(data, width, height, stride, filtered_data);
 }
@@ -162,8 +161,8 @@ static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,
 //------------------------------------------------------------------------------
-static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in,
+static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in, uint8_t* out,
-                           uint8_t* out, int width) {
+                           int width) {
  (void)prev;
  if (out != in) memcpy(out, in, width * sizeof(*out));
 }
--- a/src/dsp/filters_mips_dsp_r2.c
+++ b/src/dsp/filters_mips_dsp_r2.c
@@ -16,11 +16,12 @@
 #if defined(WEBP_USE_MIPS_DSP_R2)
 #include "src/dsp/dsp.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 #include "src/dsp/dsp.h"
 //------------------------------------------------------------------------------
 // Helpful macro.
@@ -34,12 +35,14 @@
    assert(stride >= width); \
  } while (0)
-#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do {                        \
+// clang-format off
 #define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE)                             \
  do {                                                                         \
    const uint8_t* psrc = (uint8_t*)(SRC);                                     \
    uint8_t* pdst = (uint8_t*)(DST);                                           \
    const int ilength = (int)(LENGTH);                                         \
    int temp0, temp1, temp2, temp3, temp4, temp5, temp6;                       \
-    __asm__ volatile (                                                         \
+    __asm__ volatile(                                                          \
      ".set      push                                   \n\t"                  \
      ".set      noreorder                              \n\t"                  \
      "srl       %[temp0],    %[length],    2           \n\t"                  \
@@ -101,6 +104,7 @@
      : "memory"                                                               \
    );                                                                         \
  } while (0)
 // clang-format on
 static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
                                              uint8_t* WEBP_RESTRICT dst,
@@ -108,13 +112,15 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
  DO_PREDICT_LINE(src, dst, length, 0);
 }
-#define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE) do {         \
+// clang-format off
 #define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE)              \
  do {                                                                         \
    const uint8_t* psrc = (uint8_t*)(SRC);                                     \
    const uint8_t* ppred = (uint8_t*)(PRED);                                   \
    uint8_t* pdst = (uint8_t*)(DST);                                           \
    const int ilength = (int)(LENGTH);                                         \
    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;                \
-    __asm__ volatile (                                                         \
+    __asm__ volatile(                                                          \
      ".set      push                                   \n\t"                  \
      ".set      noreorder                              \n\t"                  \
      "srl       %[temp0],    %[length],    0x3         \n\t"                  \
@@ -167,9 +173,10 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
    );                                                                         \
  } while (0)
-#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST) do {                             \
+#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST)                                  \
  do {                                                                         \
    int temp1, temp2, temp3;                                                   \
-    __asm__ volatile (                                                         \
+    __asm__ volatile(                                                          \
      "lbu       %[temp1],   0(%[src])               \n\t"                     \
      "lbu       %[temp2],   0(%[pred])              \n\t"                     \
      "subu      %[temp3],   %[temp1],   %[temp2]    \n\t"                     \
@@ -179,11 +186,13 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
      : "memory"                                                               \
    );                                                                         \
  } while (0)
 // clang-format on
 //------------------------------------------------------------------------------
 // Horizontal filter.
-#define FILTER_LINE_BY_LINE do {                                               \
+#define FILTER_LINE_BY_LINE                           \
  do {                                                \
    for (row = 1; row < height; ++row) {              \
      PREDICT_LINE_ONE_PASS(in, preds - stride, out); \
      DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0); \
@@ -221,7 +230,8 @@ static void HorizontalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
 //------------------------------------------------------------------------------
 // Vertical filter.
-#define FILTER_LINE_BY_LINE do {                                               \
+#define FILTER_LINE_BY_LINE                               \
  do {                                                    \
    for (row = 1; row < height; ++row) {                  \
      DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0); \
      preds += stride;                                    \
@@ -260,26 +270,25 @@ static void VerticalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
 static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
  int temp0;
-  __asm__ volatile (
+  __asm__ volatile(
      "addu             %[temp0],   %[a],       %[b]        \n\t"
      "subu             %[temp0],   %[temp0],   %[c]        \n\t"
      "shll_s.w         %[temp0],   %[temp0],   23          \n\t"
      "precrqu_s.qb.ph  %[temp0],   %[temp0],   $zero       \n\t"
      "srl              %[temp0],   %[temp0],   24          \n\t"
-    : [temp0]"=&r"(temp0)
+      : [temp0] "=&r"(temp0)
-    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+      : [a] "r"(a), [b] "r"(b), [c] "r"(c));
  );
  return temp0;
 }
-#define FILTER_LINE_BY_LINE(PREDS, OPERATION) do {                             \
+#define FILTER_LINE_BY_LINE(PREDS, OPERATION)                        \
  do {                                                               \
    for (row = 1; row < height; ++row) {                             \
      int w;                                                         \
      PREDICT_LINE_ONE_PASS(in, PREDS - stride, out);                \
      for (w = 1; w < width; ++w) {                                  \
-        const int pred = GradientPredictor_MIPSdspR2(PREDS[w - 1],             \
+        const int pred = GradientPredictor_MIPSdspR2(                \
-                                                     PREDS[w - stride],        \
+            PREDS[w - 1], PREDS[w - stride], PREDS[w - stride - 1]); \
                                                     PREDS[w - stride - 1]);   \
        out[w] = in[w] OPERATION pred;                               \
      }                                                              \
      in += stride;                                                  \
--- a/src/dsp/filters_msa.c
+++ b/src/dsp/filters_msa.c
@@ -15,10 +15,10 @@
 #if defined(WEBP_USE_MSA)
 #include "src/dsp/msa_macro.h"
 #include <assert.h>
 #include "src/dsp/msa_macro.h"
 static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
                                            const uint8_t* pred,
                                            uint8_t* WEBP_RESTRICT dst,
@@ -70,8 +70,8 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
 //------------------------------------------------------------------------------
 // Horrizontal filter
-static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
+static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data, int width,
-                                 int width, int height, int stride,
+                                 int height, int stride,
                                 uint8_t* WEBP_RESTRICT filtered_data) {
  const uint8_t* preds = data;
  const uint8_t* in = data;
@@ -105,7 +105,7 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
                                            uint8_t* WEBP_RESTRICT poutput,
                                            int stride, int size) {
  int w;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  while (size >= 16) {
    v16u8 pred0, dst0;
    v8i16 a0, a1, b0, b1, c0, c1;
@@ -133,9 +133,8 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
  }
 }
-
+static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data, int width,
-static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
+                               int height, int stride,
                               int width, int height, int stride,
                               uint8_t* WEBP_RESTRICT filtered_data) {
  const uint8_t* in = data;
  const uint8_t* preds = data;
@@ -151,7 +150,7 @@ static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
  out += stride;
  // Filter line-by-line.
  while (row < height) {
-    out[0] = in[0] - preds[- stride];
+    out[0] = in[0] - preds[-stride];
    PredictLineGradient(preds + 1, in + 1, out + 1, stride, width - 1);
    ++row;
    preds += stride;
@@ -163,8 +162,8 @@ static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
 //------------------------------------------------------------------------------
 // Vertical filter
-static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
+static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data, int width,
-                               int width, int height, int stride,
+                               int height, int stride,
                               uint8_t* WEBP_RESTRICT filtered_data) {
  const uint8_t* in = data;
  const uint8_t* preds = data;
--- a/src/dsp/filters_neon.c
+++ b/src/dsp/filters_neon.c
@@ -16,6 +16,7 @@
 #if defined(WEBP_USE_NEON)
 #include <assert.h>
 #include "src/dsp/neon.h"
 //------------------------------------------------------------------------------
@@ -66,8 +67,9 @@ static void PredictLineLeft_NEON(const uint8_t* WEBP_RESTRICT src,
 //------------------------------------------------------------------------------
 // Horizontal filter.
-static WEBP_INLINE void DoHorizontalFilter_NEON(
+static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* WEBP_RESTRICT in,
-    const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
+                                                int width, int height,
                                                int stride,
                                                uint8_t* WEBP_RESTRICT out) {
  int row;
  DCHECK(in, out);
@@ -88,8 +90,8 @@ static WEBP_INLINE void DoHorizontalFilter_NEON(
  }
 }
-static void HorizontalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
+static void HorizontalFilter_NEON(const uint8_t* WEBP_RESTRICT data, int width,
-                                  int width, int height, int stride,
+                                  int height, int stride,
                                  uint8_t* WEBP_RESTRICT filtered_data) {
  DoHorizontalFilter_NEON(data, width, height, stride, filtered_data);
 }
@@ -118,8 +120,8 @@ static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* WEBP_RESTRICT in,
  }
 }
-static void VerticalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
+static void VerticalFilter_NEON(const uint8_t* WEBP_RESTRICT data, int width,
-                                int width, int height, int stride,
+                                int height, int stride,
                                uint8_t* WEBP_RESTRICT filtered_data) {
  DoVerticalFilter_NEON(data, width, height, stride, filtered_data);
 }
@@ -172,8 +174,8 @@ static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* WEBP_RESTRICT in,
  }
 }
-static void GradientFilter_NEON(const uint8_t* WEBP_RESTRICT data,
+static void GradientFilter_NEON(const uint8_t* WEBP_RESTRICT data, int width,
-                                int width, int height, int stride,
+                                int height, int stride,
                                uint8_t* WEBP_RESTRICT filtered_data) {
  DoGradientFilter_NEON(data, width, height, stride, filtered_data);
 }
@@ -233,13 +235,14 @@ static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
 #endif
 #if (USE_GRADIENT_UNFILTER == 1)
-#define GRAD_PROCESS_LANE(L)  do {                                             \
+#define GRAD_PROCESS_LANE(L)                                                  \
  do {                                                                        \
    const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1); /* rotate predictor in */ \
    const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1));                    \
    const uint8x8_t delta = vqmovun_s16(tmp2);                                \
    pred = vadd_u8(D, delta);                                                 \
    out = vext_u8(out, ROTATE_LEFT_N(pred, (L)), 1);                          \
-} while (0)
+  } while (0)
 static void GradientPredictInverse_NEON(const uint8_t* const in,
                                        const uint8_t* const top,
--- a/src/dsp/filters_sse2.c
+++ b/src/dsp/filters_sse2.c
@@ -62,9 +62,9 @@ static void PredictLineLeft_SSE2(const uint8_t* WEBP_RESTRICT src,
  const int max_pos = length & ~31;
  assert(length >= 0);
  for (i = 0; i < max_pos; i += 32) {
-    const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + i +  0    ));
+    const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
    const __m128i B0 = _mm_loadu_si128((const __m128i*)(src + i + 0 - 1));
-    const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + i + 16    ));
+    const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
    const __m128i B1 = _mm_loadu_si128((const __m128i*)(src + i + 16 - 1));
    const __m128i C0 = _mm_sub_epi8(A0, B0);
    const __m128i C1 = _mm_sub_epi8(A1, B1);
@@ -77,8 +77,9 @@ static void PredictLineLeft_SSE2(const uint8_t* WEBP_RESTRICT src,
 //------------------------------------------------------------------------------
 // Horizontal filter.
-static WEBP_INLINE void DoHorizontalFilter_SSE2(
+static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
-    const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
+                                                int width, int height,
                                                int stride,
                                                uint8_t* WEBP_RESTRICT out) {
  int row;
  DCHECK(in, out);
@@ -183,20 +184,20 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
 //------------------------------------------------------------------------------
-static void HorizontalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
+static void HorizontalFilter_SSE2(const uint8_t* WEBP_RESTRICT data, int width,
-                                  int width, int height, int stride,
+                                  int height, int stride,
                                  uint8_t* WEBP_RESTRICT filtered_data) {
  DoHorizontalFilter_SSE2(data, width, height, stride, filtered_data);
 }
-static void VerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
+static void VerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT data, int width,
-                                int width, int height, int stride,
+                                int height, int stride,
                                uint8_t* WEBP_RESTRICT filtered_data) {
  DoVerticalFilter_SSE2(data, width, height, stride, filtered_data);
 }
-static void GradientFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
+static void GradientFilter_SSE2(const uint8_t* WEBP_RESTRICT data, int width,
-                                int width, int height, int stride,
+                                int height, int stride,
                                uint8_t* WEBP_RESTRICT filtered_data) {
  DoGradientFilter_SSE2(data, width, height, stride, filtered_data);
 }
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -40,8 +40,8 @@ static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
  return Average2(Average2(a0, a2), a1);
 }
-static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, uint32_t a2,
-                                     uint32_t a2, uint32_t a3) {
+                                     uint32_t a3) {
  return Average2(Average2(a0, a1), Average2(a2, a3));
 }
@@ -61,11 +61,9 @@ static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
                                                   uint32_t c2) {
  const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
-  const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
+  const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, (c1 >> 16) & 0xff,
                                         (c1 >> 16) & 0xff,
                                         (c2 >> 16) & 0xff);
-  const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
+  const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, (c1 >> 8) & 0xff,
                                         (c1 >> 8) & 0xff,
                                         (c2 >> 8) & 0xff);
  const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
@@ -88,9 +86,9 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
 // gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
 // inlined.
 #if defined(__arm__) && defined(__GNUC__) && LOCAL_GCC_VERSION <= 0x409
-# define LOCAL_INLINE __attribute__ ((noinline))
+#define LOCAL_INLINE __attribute__((noinline))
 #else
-# define LOCAL_INLINE WEBP_INLINE
+#define LOCAL_INLINE WEBP_INLINE
 #endif
 static LOCAL_INLINE int Sub3(int a, int b, int c) {
@@ -103,10 +101,10 @@ static LOCAL_INLINE int Sub3(int a, int b, int c) {
 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
  const int pa_minus_pb =
-      Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
+      Sub3((a >> 24), (b >> 24), (c >> 24)) +
      Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
      Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
-      Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
+      Sub3((a) & 0xff, (b) & 0xff, (c) & 0xff);
  return (pa_minus_pb <= 0) ? a : b;
 }
@@ -277,8 +275,7 @@ void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
  }
 }
-static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
+static WEBP_INLINE int ColorTransformDelta(int8_t color_pred, int8_t color) {
                                           int8_t color) {
  return ((int)color_pred * color) >> 5;
 }
@@ -324,7 +321,7 @@ static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
  while (y < y_end) {
    const uint32_t* pred = pred_row;
-    VP8LMultipliers m = { 0, 0, 0 };
+    VP8LMultipliers m = {0, 0, 0};
    const uint32_t* const src_safe_end = src + safe_width;
    const uint32_t* const src_end = src + width;
    while (src < src_safe_end) {
@@ -346,6 +343,7 @@ static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
 // Separate out pixels packed together using pixel-bundling.
 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
 // clang-format off
 #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
                            GET_INDEX, GET_VALUE)                              \
 static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
@@ -386,17 +384,18 @@ STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
    VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
  }                                                                            \
 }
 // clang-format on
-COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
+COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static, uint32_t,
-                    uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
+                    32b, VP8GetARGBIndex, VP8GetARGBValue)
-COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
+COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, , uint8_t,
-                    uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
+                    8b, VP8GetAlphaIndex, VP8GetAlphaValue)
 #undef COLOR_INDEX_INVERSE
-void VP8LInverseTransform(const VP8LTransform* const transform,
+void VP8LInverseTransform(const VP8LTransform* const transform, int row_start,
-                          int row_start, int row_end,
+                          int row_end, const uint32_t* const in,
-                          const uint32_t* const in, uint32_t* const out) {
+                          uint32_t* const out) {
  const int width = transform->xsize;
  assert(row_start < row_end);
  assert(row_end <= transform->ysize);
@@ -424,7 +423,8 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
        // the effective width of VP8LSubSampleSize(xsize, bits). All other
        // transforms work on effective width of 'xsize'.
        const int out_stride = (row_end - row_start) * width;
-        const int in_stride = (row_end - row_start) *
+        const int in_stride =
            (row_end - row_start) *
            VP8LSubSampleSize(transform->xsize, transform->bits);
        uint32_t* const src = out + out_stride - in_stride;
        memmove(src, out, in_stride * sizeof(*src));
@@ -443,12 +443,12 @@ static int is_big_endian(void) {
  static const union {
    uint16_t w;
    uint8_t b[2];
-  } tmp = { 1 };
+  } tmp = {1};
  return (tmp.b[0] != 1);
 }
-void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
+void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
-                            int num_pixels, uint8_t* WEBP_RESTRICT dst) {
+                            uint8_t* WEBP_RESTRICT dst) {
  const uint32_t* const src_end = src + num_pixels;
  while (src < src_end) {
    const uint32_t argb = *src++;
@@ -458,8 +458,8 @@ void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
  }
 }
-void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src,
+void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
-                             int num_pixels, uint8_t* WEBP_RESTRICT dst) {
+                             uint8_t* WEBP_RESTRICT dst) {
  const uint32_t* const src_end = src + num_pixels;
  while (src < src_end) {
    const uint32_t argb = *src++;
@@ -504,8 +504,8 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
  }
 }
-void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src,
+void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
-                            int num_pixels, uint8_t* WEBP_RESTRICT dst) {
+                            uint8_t* WEBP_RESTRICT dst) {
  const uint32_t* const src_end = src + num_pixels;
  while (src < src_end) {
    const uint32_t argb = *src++;
@@ -607,7 +607,8 @@ extern void VP8LDspInitNEON(void);
 extern void VP8LDspInitMIPSdspR2(void);
 extern void VP8LDspInitMSA(void);
-#define COPY_PREDICTOR_ARRAY(IN, OUT) do {                \
+#define COPY_PREDICTOR_ARRAY(IN, OUT)                       \
  do {                                                      \
    (OUT)[0] = IN##0_C;                                     \
    (OUT)[1] = IN##1_C;                                     \
    (OUT)[2] = IN##2_C;                                     \
@@ -624,7 +625,7 @@ extern void VP8LDspInitMSA(void);
    (OUT)[13] = IN##13_C;                                   \
    (OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
    (OUT)[15] = IN##0_C;                                    \
-} while (0);
+  } while (0);
 WEBP_DSP_INIT_FUNC(VP8LDspInit) {
  COPY_PREDICTOR_ARRAY(VP8LPredictor, VP8LPredictors)
--- a/src/dsp/lossless.h
+++ b/src/dsp/lossless.h
@@ -16,8 +16,8 @@
 #define WEBP_DSP_LOSSLESS_H_
 #include "src/dsp/dsp.h"
 #include "src/webp/types.h"
 #include "src/webp/decode.h"
 #include "src/webp/types.h"
 #ifdef __cplusplus
 extern "C" {
@@ -88,8 +88,8 @@ struct VP8LTransform;  // Defined in dec/vp8li.h.
 // The *in and *out pointers refer to source and destination data respectively
 // corresponding to the intermediate row (row_start).
 void VP8LInverseTransform(const struct VP8LTransform* const transform,
-                          int row_start, int row_end,
+                          int row_start, int row_end, const uint32_t* const in,
-                          const uint32_t* const in, uint32_t* const out);
+                          uint32_t* const out);
 // Color space conversion.
 typedef void (*VP8LConvertFunc)(const uint32_t* WEBP_RESTRICT src,
@@ -107,13 +107,11 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba);
 typedef void (*VP8LMapARGBFunc)(const uint32_t* src,
-                                const uint32_t* const color_map,
+                                const uint32_t* const color_map, uint32_t* dst,
-                                uint32_t* dst, int y_start,
+                                int y_start, int y_end, int width);
                                int y_end, int width);
 typedef void (*VP8LMapAlphaFunc)(const uint8_t* src,
-                                 const uint32_t* const color_map,
+                                 const uint32_t* const color_map, uint8_t* dst,
-                                 uint8_t* dst, int y_start,
+                                 int y_start, int y_end, int width);
                                 int y_end, int width);
 extern VP8LMapARGBFunc VP8LMapColor32b;
 extern VP8LMapAlphaFunc VP8LMapColor8b;
@@ -158,16 +156,14 @@ typedef void (*VP8LTransformColorFunc)(
 extern VP8LTransformColorFunc VP8LTransformColor;
 extern VP8LTransformColorFunc VP8LTransformColor_SSE;
 typedef void (*VP8LCollectColorBlueTransformsFunc)(
-    const uint32_t* WEBP_RESTRICT argb, int stride,
+    const uint32_t* WEBP_RESTRICT argb, int stride, int tile_width,
-    int tile_width, int tile_height,
+    int tile_height, int green_to_blue, int red_to_blue, uint32_t histo[]);
    int green_to_blue, int red_to_blue, uint32_t histo[]);
 extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
 extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms_SSE;
 typedef void (*VP8LCollectColorRedTransformsFunc)(
-    const uint32_t* WEBP_RESTRICT argb, int stride,
+    const uint32_t* WEBP_RESTRICT argb, int stride, int tile_width,
-    int tile_width, int tile_height,
+    int tile_height, int green_to_red, uint32_t histo[]);
    int green_to_red, uint32_t histo[]);
 extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
 extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms_SSE;
@@ -176,14 +172,13 @@ void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
                          uint32_t* WEBP_RESTRICT data, int num_pixels);
 void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
 void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
-                                     int stride,
+                                     int stride, int tile_width,
-                                     int tile_width, int tile_height,
+                                     int tile_height, int green_to_red,
                                     int green_to_red, uint32_t histo[]);
 void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
                                      int stride,
                                      int tile_width, int tile_height,
                                      int green_to_blue, int red_to_blue,
                                     uint32_t histo[]);
 void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
                                      int stride, int tile_width,
                                      int tile_height, int green_to_blue,
                                      int red_to_blue, uint32_t histo[]);
 extern VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
 extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
@@ -256,8 +251,8 @@ typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row,
                                       uint32_t* WEBP_RESTRICT dst);
 extern VP8LBundleColorMapFunc VP8LBundleColorMap;
 extern VP8LBundleColorMapFunc VP8LBundleColorMap_SSE;
-void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
+void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row, int width,
-                          int width, int xbits, uint32_t* WEBP_RESTRICT dst);
+                          int xbits, uint32_t* WEBP_RESTRICT dst);
 // Must be called before calling any of the above methods.
 void VP8LEncDspInit(void);
--- a/src/dsp/lossless_avx2.c
+++ b/src/dsp/lossless_avx2.c
@@ -15,8 +15,8 @@
 #if defined(WEBP_USE_AVX2)
 #include <stddef.h>
 #include <immintrin.h>
 #include <stddef.h>
 #include "src/dsp/cpu.h"
 #include "src/dsp/lossless.h"
@@ -362,9 +362,8 @@ static void TransformColorInverse_AVX2(const VP8LMultipliers* const m,
                                       int num_pixels, uint32_t* dst) {
 // sign-extended multiplying constants, pre-shifted by 5.
 #define CST(X) (((int16_t)(m->X << 8)) >> 5)  // sign-extend
-  const __m256i mults_rb =
+  const __m256i mults_rb = _mm256_set1_epi32(
-      _mm256_set1_epi32((int)((uint32_t)CST(green_to_red) << 16 |
+      (int)((uint32_t)CST(green_to_red) << 16 | (CST(green_to_blue) & 0xffff)));
                              (CST(green_to_blue) & 0xffff)));
  const __m256i mults_b2 = _mm256_set1_epi32(CST(red_to_blue));
 #undef CST
  const __m256i mask_ag = _mm256_set1_epi32((int)0xff00ff00);
--- a/src/dsp/lossless_common.h
+++ b/src/dsp/lossless_common.h
@@ -35,13 +35,9 @@ static WEBP_INLINE uint32_t VP8GetARGBIndex(uint32_t idx) {
  return (idx >> 8) & 0xff;
 }
-static WEBP_INLINE uint8_t VP8GetAlphaIndex(uint8_t idx) {
+static WEBP_INLINE uint8_t VP8GetAlphaIndex(uint8_t idx) { return idx; }
  return idx;
 }
-static WEBP_INLINE uint32_t VP8GetARGBValue(uint32_t val) {
+static WEBP_INLINE uint32_t VP8GetARGBValue(uint32_t val) { return val; }
  return val;
 }
 static WEBP_INLINE uint8_t VP8GetAlphaValue(uint32_t val) {
  return (val >> 8) & 0xff;
@@ -172,16 +168,16 @@ static WEBP_INLINE void VP8LPrefixEncode(int distance, int* const code,
 }
 // Sum of each component, mod 256.
-static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
+static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE uint32_t
-uint32_t VP8LAddPixels(uint32_t a, uint32_t b) {
+VP8LAddPixels(uint32_t a, uint32_t b) {
  const uint32_t alpha_and_green = (a & 0xff00ff00u) + (b & 0xff00ff00u);
  const uint32_t red_and_blue = (a & 0x00ff00ffu) + (b & 0x00ff00ffu);
  return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
 }
 // Difference of each component, mod 256.
-static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
+static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE uint32_t
-uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
+VP8LSubPixels(uint32_t a, uint32_t b) {
  const uint32_t alpha_and_green =
      0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u);
  const uint32_t red_and_blue =
@@ -198,7 +194,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
 // The predictor is added to the output pixel (which
 // is therefore considered as a residual) to get the final prediction.
 #define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD)                   \
-static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper,     \
+  static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper,     \
                            int num_pixels, uint32_t* WEBP_RESTRICT out) { \
    int x;                                                                 \
    assert(upper != NULL);                                                 \
@@ -206,7 +202,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper,     \
      const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x);           \
      out[x] = VP8LAddPixels(in[x], pred);                                 \
    }                                                                      \
-}
+  }
 #ifdef __cplusplus
 }  // extern "C"
--- a/src/dsp/lossless_enc.c
+++ b/src/dsp/lossless_enc.c
@@ -69,8 +69,7 @@ const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX] = {
    65865245, 65917522, 65969575, 66021404, 66073013, 66124403, 66175575,
    66226531, 66277275, 66327806, 66378127, 66428240, 66478146, 66527847,
    66577345, 66626641, 66675737, 66724635, 66773336, 66821842, 66870154,
-  66918274, 66966204, 67013944, 67061497
+    66918274, 66966204, 67013944, 67061497};
 };
 // lookup table for small values of int*log2(int) * (1 << LOG_2_PRECISION_BITS).
 // Obtained in Python with:
@@ -80,18 +79,18 @@ const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX] = {
 // print(',\n '.join([','.join(v) for v in batched([i.rjust(15)
 //                      for i in a],4)]))
 const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
-               0,              0,       16777216,       39886887,
+    0ull,           0ull,           16777216ull,    39886887ull,
-        67108864,       97388723,      130105423,      164848600,
+    67108864ull,    97388723ull,    130105423ull,   164848600ull,
-       201326592,      239321324,      278663526,      319217973,
+    201326592ull,   239321324ull,   278663526ull,   319217973ull,
-       360874141,      403539997,      447137711,      491600606,
+    360874141ull,   403539997ull,   447137711ull,   491600606ull,
-       536870912,      582898099,      629637592,      677049776,
+    536870912ull,   582898099ull,   629637592ull,   677049776ull,
-       725099212,      773754010,      822985323,      872766924,
+    725099212ull,   773754010ull,   822985323ull,   872766924ull,
-       923074875,      973887230,     1025183802,     1076945958,
+    923074875ull,   973887230ull,   1025183802ull,  1076945958ull,
-      1129156447,     1181799249,     1234859451,     1288323135,
+    1129156447ull,  1181799249ull,  1234859451ull,  1288323135ull,
-      1342177280,     1396409681,     1451008871,     1505964059,
+    1342177280ull,  1396409681ull,  1451008871ull,  1505964059ull,
-      1561265072,     1616902301,     1672866655,     1729149526,
+    1561265072ull,  1616902301ull,  1672866655ull,  1729149526ull,
-      1785742744,     1842638548,     1899829557,     1957308741,
+    1785742744ull,  1842638548ull,  1899829557ull,  1957308741ull,
-      2015069397,     2073105127,     2131409817,  2189977618ull,
+    2015069397ull,  2073105127ull,  2131409817ull,  2189977618ull,
    2248802933ull,  2307880396ull,  2367204859ull,  2426771383ull,
    2486575220ull,  2546611805ull,  2606876748ull,  2667365819ull,
    2728074942ull,  2789000187ull,  2850137762ull,  2911484006ull,
@@ -143,15 +142,14 @@ const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
    15918673369ull, 15997128556ull, 16075633960ull, 16154189373ull,
    16232794589ull, 16311449405ull, 16390153617ull, 16468907026ull,
    16547709431ull, 16626560636ull, 16705460444ull, 16784408661ull,
-  16863405094ull, 16942449552ull, 17021541845ull, 17100681785ull
+    16863405094ull, 16942449552ull, 17021541845ull, 17100681785ull};
 };
 const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
-  { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
+    {0, 0},  {0, 0},  {1, 0},  {2, 0},  {3, 0},  {4, 1},  {4, 1},  {5, 1},
-  { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
+    {5, 1},  {6, 2},  {6, 2},  {6, 2},  {6, 2},  {7, 2},  {7, 2},  {7, 2},
-  { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
+    {7, 2},  {8, 3},  {8, 3},  {8, 3},  {8, 3},  {8, 3},  {8, 3},  {8, 3},
-  { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
+    {8, 3},  {9, 3},  {9, 3},  {9, 3},  {9, 3},  {9, 3},  {9, 3},  {9, 3},
-  { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
+    {9, 3},  {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
    {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
    {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
    {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
@@ -214,40 +212,41 @@ const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
 };
 const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
-   0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  2,  3,  0,  1,  2,  3,
+    0,   0,   0,   0,   0,   0,   1,   0,   1,   0,   1,   2,   3,   0,   1,
-   0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
+    2,   3,   0,   1,   2,   3,   4,   5,   6,   7,   0,   1,   2,   3,   4,
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    5,   6,   7,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    12,  13,  14,  15,  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    11,  12,  13,  14,  15,  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    25,  26,  27,  28,  29,  30,  31,  0,   1,   2,   3,   4,   5,   6,   7,
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    23,  24,  25,  26,  27,  28,  29,  30,  31,  0,   1,   2,   3,   4,   5,
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,
-  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
-  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+    36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  0,   1,
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,
-  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
-  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+    32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    62,  63,  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,
-  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
-  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+    28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,
-  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+    43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,
-  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+    58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,
-  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+    73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,
-  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+    88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102,
-  127,
+    103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0,   1,   2,   3,   4,
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
-  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,
-  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+    35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,
-  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+    50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
-  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+    65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
-  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+    80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,
-  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
+    95,  96,  97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
-};
+    110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
    125, 126};
 static uint64_t FastSLog2Slow_C(uint32_t v) {
  assert(v >= LOG_LOOKUP_IDX_MAX);
@@ -469,9 +468,7 @@ static WEBP_INLINE int ColorTransformDelta(int8_t color_pred, int8_t color) {
  return ((int)color_pred * color) >> 5;
 }
-static WEBP_INLINE int8_t U32ToS8(uint32_t v) {
+static WEBP_INLINE int8_t U32ToS8(uint32_t v) { return (int8_t)(v & 0xff); }
  return (int8_t)(v & 0xff);
 }
 void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
                          uint32_t* WEBP_RESTRICT data, int num_pixels) {
@@ -511,9 +508,9 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
 }
 void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
-                                     int stride,
+                                     int stride, int tile_width,
-                                     int tile_width, int tile_height,
+                                     int tile_height, int green_to_red,
-                                     int green_to_red, uint32_t histo[]) {
+                                     uint32_t histo[]) {
  while (tile_height-- > 0) {
    int x;
    for (x = 0; x < tile_width; ++x) {
@@ -524,10 +521,9 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
 }
 void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
-                                      int stride,
+                                      int stride, int tile_width,
-                                      int tile_width, int tile_height,
+                                      int tile_height, int green_to_blue,
-                                      int green_to_blue, int red_to_blue,
+                                      int red_to_blue, uint32_t histo[]) {
                                      uint32_t histo[]) {
  while (tile_height-- > 0) {
    int x;
    for (x = 0; x < tile_width; ++x) {
@@ -551,8 +547,8 @@ static int VectorMismatch_C(const uint32_t* const array1,
 }
 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
-void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
+void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row, int width,
-                          int width, int xbits, uint32_t* WEBP_RESTRICT dst) {
+                          int xbits, uint32_t* WEBP_RESTRICT dst) {
  int x;
  if (xbits > 0) {
    const int bit_depth = 1 << (3 - xbits);
@@ -618,9 +614,8 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
 // It subtracts the prediction from the input pixel and stores the residual
 // in the output pixel.
 #define GENERATE_PREDICTOR_SUB(PREDICTOR_I)                      \
-static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in,              \
+  static void PredictorSub##PREDICTOR_I##_C(                     \
-                                          const uint32_t* upper,           \
+      const uint32_t* in, const uint32_t* upper, int num_pixels, \
                                          int num_pixels,                  \
      uint32_t* WEBP_RESTRICT out) {                             \
    int x;                                                       \
    assert(upper != NULL);                                       \
@@ -629,7 +624,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in,              \
          VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x); \
      out[x] = VP8LSubPixels(in[x], pred);                       \
    }                                                            \
-}
+  }
 GENERATE_PREDICTOR_SUB(2)
 GENERATE_PREDICTOR_SUB(3)
--- a/src/dsp/lossless_enc_avx2.c
+++ b/src/dsp/lossless_enc_avx2.c
@@ -14,10 +14,9 @@
 #include "src/dsp/dsp.h"
 #if defined(WEBP_USE_AVX2)
 #include <assert.h>
 #include <emmintrin.h>
 #include <immintrin.h>
 #include <assert.h>
 #include <stddef.h>
 #include "src/dsp/cpu.h"
--- a/Show More
+++ b/Show More