apply clang-format

(Debian clang-format version 19.1.7 (3+build4)) with `--style=Google`. Manual changes: * clang-format disabled around macros with stringification (mostly assembly) * some inline assembly strings were adjusted to avoid awkward line breaks * trailing commas, `//` or suffixes (`ull`) added to help array formatting * thread_utils.c: parameter comments were changed to the more common /*...=*/ style to improve formatting The automatically generated code under swig/ was skipped. Bug: 433996651 Change-Id: Iea3f24160d78d2a2653971cdf13fa932e47ff1b3
2026-02-11 12:36:37 +01:00 · 2025-07-28 18:23:12 -07:00
parent b569988d3f
commit 44257cb826
224 changed files with 16312 additions and 16734 deletions
--- a/examples/anim_diff.c
+++ b/examples/anim_diff.c
@@ -57,8 +57,8 @@ static WEBP_INLINE int PixelsAreSimilar(uint32_t src, uint32_t dst,
 }

 static int FramesAreSimilar(const uint8_t* const rgba1,
-                            const uint8_t* const rgba2,
-                            int width, int height, int max_allowed_diff) {
+                            const uint8_t* const rgba2, int width, int height,
+                            int max_allowed_diff) {
  int i, j;
  assert(max_allowed_diff > 0);
  for (j = 0; j < height; ++j) {
@@ -120,8 +120,7 @@ static int CompareBackgroundColor(uint32_t bg1, uint32_t bg2, int premultiply) {
    if (alpha1 == 0 && alpha2 == 0) return 1;
  }
  if (bg1 != bg2) {
-    fprintf(stderr, "Background color mismatch: 0x%08x vs 0x%08x\n",
-            bg1, bg2);
+    fprintf(stderr, "Background color mismatch: 0x%08x vs 0x%08x\n", bg1, bg2);
    return 0;
  }
  return 1;
@@ -131,8 +130,7 @@ static int CompareBackgroundColor(uint32_t bg1, uint32_t bg2, int premultiply) {
 // is OK for other aspects like offsets, dispose/blend method to vary.
 static int CompareAnimatedImagePair(const AnimatedImage* const img1,
                                    const AnimatedImage* const img2,
-                                    int premultiply,
-                                    double min_psnr) {
+                                    int premultiply, double min_psnr) {
  int ok = 1;
  const int is_multi_frame_image = (img1->num_frames > 1);
  uint32_t i;
@@ -141,8 +139,8 @@ static int CompareAnimatedImagePair(const AnimatedImage* const img1,
                      "Canvas width mismatch");
  ok &= CompareValues(img1->canvas_height, img2->canvas_height,
                      "Canvas height mismatch");
-  ok &= CompareValues(img1->num_frames, img2->num_frames,
-                      "Frame count mismatch");
+  ok &=
+      CompareValues(img1->num_frames, img2->num_frames, "Frame count mismatch");
  if (!ok) return 0;  // These are fatal failures, can't proceed.

  if (is_multi_frame_image) {  // Checks relevant for multi-frame images only.
@@ -178,8 +176,8 @@ static int CompareAnimatedImagePair(const AnimatedImage* const img1,
                   premultiply, &max_diff, &psnr);
    if (min_psnr > 0.) {
      if (psnr < min_psnr) {
-        fprintf(stderr, "Frame #%d, psnr = %.2lf (min_psnr = %f)\n", i,
-                psnr, min_psnr);
+        fprintf(stderr, "Frame #%d, psnr = %.2lf (min_psnr = %f)\n", i, psnr,
+                min_psnr);
        ok = 0;
      }
    } else {
@@ -199,9 +197,10 @@ static void Help(void) {
  printf("  -min_psnr <float> ... minimum per-frame PSNR\n");
  printf("  -raw_comparison ..... if this flag is not used, RGB is\n");
  printf("                        premultiplied before comparison\n");
-  printf("  -max_diff <int> ..... maximum allowed difference per channel\n"
-         "                        between corresponding pixels in subsequent\n"
-         "                        frames\n");
+  printf(
+      "  -max_diff <int> ..... maximum allowed difference per channel\n"
+      "                        between corresponding pixels in subsequent\n"
+      "                        frames\n");
  printf("  -h .................. this help\n");
  printf("  -version ............ print version number and exit\n");
 }
@@ -217,7 +216,7 @@ int main(int argc, const char* argv[]) {
  int premultiply = 1;
  int max_diff = 0;
  int i, c;
-  const char* files[2] = { NULL, NULL };
+  const char* files[2] = {NULL, NULL};
  AnimatedImage images[2];

  INIT_WARGV(argc, argv);
@@ -253,9 +252,8 @@ int main(int argc, const char* argv[]) {
      GetAnimatedImageVersions(&dec_version, &demux_version);
      printf("WebP Decoder version: %d.%d.%d\nWebP Demux version: %d.%d.%d\n",
             (dec_version >> 16) & 0xff, (dec_version >> 8) & 0xff,
-             (dec_version >> 0) & 0xff,
-             (demux_version >> 16) & 0xff, (demux_version >> 8) & 0xff,
-             (demux_version >> 0) & 0xff);
+             (dec_version >> 0) & 0xff, (demux_version >> 16) & 0xff,
+             (demux_version >> 8) & 0xff, (demux_version >> 0) & 0xff);
      FREE_WARGV_AND_RETURN(0);
    } else {
      if (!got_input1) {
@@ -278,7 +276,6 @@ int main(int argc, const char* argv[]) {
    FREE_WARGV_AND_RETURN(return_code);
  }

-
  if (!got_input2) {
    Help();
    FREE_WARGV_AND_RETURN(return_code);
@@ -301,8 +298,8 @@ int main(int argc, const char* argv[]) {
    }
  }

-  if (!CompareAnimatedImagePair(&images[0], &images[1],
-                                premultiply, min_psnr)) {
+  if (!CompareAnimatedImagePair(&images[0], &images[1], premultiply,
+                                min_psnr)) {
    WFPRINTF(stderr, "\nFiles %s and %s differ.\n", (const W_CHAR*)files[0],
             (const W_CHAR*)files[1]);
    return_code = 1;
@@ -311,7 +308,7 @@ int main(int argc, const char* argv[]) {
            (const W_CHAR*)files[1]);
    return_code = 0;
  }
- End:
+End:
  ClearAnimatedImage(&images[0]);
  ClearAnimatedImage(&images[1]);
  FREE_WARGV_AND_RETURN(return_code);
--- a/examples/anim_dump.c
+++ b/examples/anim_dump.c
@@ -29,8 +29,9 @@ static void Help(void) {
  printf("Usage: anim_dump [options] files...\n");
  printf("\nOptions:\n");
  printf("  -folder <string> .... dump folder (default: '.')\n");
-  printf("  -prefix <string> .... prefix for dumped frames "
-                                  "(default: 'dump_')\n");
+  printf(
+      "  -prefix <string> .... prefix for dumped frames "
+      "(default: 'dump_')\n");
  printf("  -tiff ............... save frames as TIFF\n");
  printf("  -pam ................ save frames as PAM\n");
  printf("  -h .................. this help\n");
@@ -82,17 +83,16 @@ int main(int argc, const char* argv[]) {
      GetAnimatedImageVersions(&dec_version, &demux_version);
      printf("WebP Decoder version: %d.%d.%d\nWebP Demux version: %d.%d.%d\n",
             (dec_version >> 16) & 0xff, (dec_version >> 8) & 0xff,
-             (dec_version >> 0) & 0xff,
-             (demux_version >> 16) & 0xff, (demux_version >> 8) & 0xff,
-             (demux_version >> 0) & 0xff);
+             (dec_version >> 0) & 0xff, (demux_version >> 16) & 0xff,
+             (demux_version >> 8) & 0xff, (demux_version >> 0) & 0xff);
      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else {
      uint32_t i;
      AnimatedImage image;
      const W_CHAR* const file = GET_WARGV(argv, c);
      memset(&image, 0, sizeof(image));
-      WPRINTF("Decoding file: %s as %s/%sxxxx.%s\n",
-              file, dump_folder, prefix, suffix);
+      WPRINTF("Decoding file: %s as %s/%sxxxx.%s\n", file, dump_folder, prefix,
+              suffix);
      if (!ReadAnimatedImage((const char*)file, &image, 0, NULL)) {
        WFPRINTF(stderr, "Error decoding file: %s\n Aborting.\n", file);
        error = 1;
@@ -113,8 +113,8 @@ int main(int argc, const char* argv[]) {
        buffer.u.RGBA.rgba = image.frames[i].rgba;
        buffer.u.RGBA.stride = buffer.width * sizeof(uint32_t);
        buffer.u.RGBA.size = buffer.u.RGBA.stride * buffer.height;
-        WSNPRINTF(out_file, sizeof(out_file), "%s/%s%.4d.%s",
-                  dump_folder, prefix, i, suffix);
+        WSNPRINTF(out_file, sizeof(out_file), "%s/%s%.4d.%s", dump_folder,
+                  prefix, i, suffix);
        if (!WebPSaveImage(&buffer, format, (const char*)out_file)) {
          WFPRINTF(stderr, "Error while saving image '%s'\n", out_file);
          error = 1;
--- a/examples/anim_util.c
+++ b/examples/anim_util.c
@@ -41,11 +41,11 @@ static const int kNumChannels = 4;

 #if defined(WEBP_HAVE_GIF)
 // Returns true if the frame covers the full canvas.
-static int IsFullFrame(int width, int height,
-                       int canvas_width, int canvas_height) {
+static int IsFullFrame(int width, int height, int canvas_width,
+                       int canvas_height) {
  return (width == canvas_width && height == canvas_height);
 }
-#endif // WEBP_HAVE_GIF
+#endif  // WEBP_HAVE_GIF

 static int CheckSizeForOverflow(uint64_t size) {
  return (size == (size_t)size);
@@ -95,8 +95,8 @@ void ClearAnimatedImage(AnimatedImage* const image) {

 #if defined(WEBP_HAVE_GIF)
 // Clear the canvas to transparent.
-static void ZeroFillCanvas(uint8_t* rgba,
-                           uint32_t canvas_width, uint32_t canvas_height) {
+static void ZeroFillCanvas(uint8_t* rgba, uint32_t canvas_width,
+                           uint32_t canvas_height) {
  memset(rgba, 0, canvas_width * kNumChannels * canvas_height);
 }

@@ -113,16 +113,16 @@ static void ZeroFillFrameRect(uint8_t* rgba, int rgba_stride, int x_offset,
 }

 // Copy width * height pixels from 'src' to 'dst'.
-static void CopyCanvas(const uint8_t* src, uint8_t* dst,
-                       uint32_t width, uint32_t height) {
+static void CopyCanvas(const uint8_t* src, uint8_t* dst, uint32_t width,
+                       uint32_t height) {
  assert(src != NULL && dst != NULL);
  memcpy(dst, src, width * kNumChannels * height);
 }

 // Copy pixels in the given rectangle from 'src' to 'dst' honoring the 'stride'.
 static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
-                               int x_offset, int y_offset,
-                               int width, int height) {
+                               int x_offset, int y_offset, int width,
+                               int height) {
  int j;
  const int width_in_bytes = width * kNumChannels;
  const size_t offset = y_offset * stride + x_offset * kNumChannels;
@@ -135,11 +135,11 @@ static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
    dst += stride;
  }
 }
-#endif // WEBP_HAVE_GIF
+#endif  // WEBP_HAVE_GIF

 // Canonicalize all transparent pixels to transparent black to aid comparison.
-static void CleanupTransparentPixels(uint32_t* rgba,
-                                     uint32_t width, uint32_t height) {
+static void CleanupTransparentPixels(uint32_t* rgba, uint32_t width,
+                                     uint32_t height) {
  const uint32_t* const rgba_end = rgba + width * height;
  while (rgba < rgba_end) {
    const uint8_t alpha = (*rgba >> 24) & 0xff;
@@ -152,8 +152,8 @@ static void CleanupTransparentPixels(uint32_t* rgba,

 // Dump frame to a PAM file. Returns true on success.
 static int DumpFrame(const char filename[], const char dump_folder[],
-                     uint32_t frame_num, const uint8_t rgba[],
-                     int canvas_width, int canvas_height) {
+                     uint32_t frame_num, const uint8_t rgba[], int canvas_width,
+                     int canvas_height) {
  int ok = 0;
  size_t max_len;
  int y;
@@ -166,8 +166,8 @@ static int DumpFrame(const char filename[], const char dump_folder[],

  base_name = WSTRRCHR(filename, '/');
  base_name = (base_name == NULL) ? (const W_CHAR*)filename : base_name + 1;
-  max_len = WSTRLEN(dump_folder) + 1 + WSTRLEN(base_name)
-          + strlen("_frame_") + strlen(".pam") + 8;
+  max_len = WSTRLEN(dump_folder) + 1 + WSTRLEN(base_name) + strlen("_frame_") +
+            strlen(".pam") + 8;
  file_name = (W_CHAR*)WebPMalloc(max_len * sizeof(*file_name));
  if (file_name == NULL) goto End;

@@ -183,7 +183,8 @@ static int DumpFrame(const char filename[], const char dump_folder[],
    ok = 0;
    goto End;
  }
-  if (fprintf(f, "P7\nWIDTH %d\nHEIGHT %d\n"
+  if (fprintf(f,
+              "P7\nWIDTH %d\nHEIGHT %d\n"
              "DEPTH 4\nMAXVAL 255\nTUPLTYPE RGB_ALPHA\nENDHDR\n",
              canvas_width, canvas_height) < 0) {
    WFPRINTF(stderr, "Write error for file %s\n", file_name);
@@ -198,7 +199,7 @@ static int DumpFrame(const char filename[], const char dump_folder[],
    row += canvas_width * kNumChannels;
  }
  ok = 1;
- End:
+End:
  if (f != NULL) fclose(f);
  WebPFree(file_name);
  return ok;
@@ -266,8 +267,8 @@ static int ReadAnimatedWebP(const char filename[],
           image->canvas_width * kNumChannels * image->canvas_height);

    // Needed only because we may want to compare with GIF later.
-    CleanupTransparentPixels((uint32_t*)curr_rgba,
-                             image->canvas_width, image->canvas_height);
+    CleanupTransparentPixels((uint32_t*)curr_rgba, image->canvas_width,
+                             image->canvas_height);

    if (dump_frames && dump_ok) {
      dump_ok = DumpFrame(filename, dump_folder, frame_index, curr_rgba,
@@ -283,7 +284,7 @@ static int ReadAnimatedWebP(const char filename[],
  ok = dump_ok;
  if (ok) image->format = ANIM_WEBP;

- End:
+End:
  WebPAnimDecoderDelete(dec);
  return ok;
 }
@@ -303,12 +304,11 @@ static int IsGIF(const WebPData* const data) {

 // GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
 #if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
-# define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
-# define LOCAL_GIF_PREREQ(maj, min) \
-    (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
+#define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
+#define LOCAL_GIF_PREREQ(maj, min) (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
 #else
-# define LOCAL_GIF_VERSION 0
-# define LOCAL_GIF_PREREQ(maj, min) 0
+#define LOCAL_GIF_VERSION 0
+#define LOCAL_GIF_PREREQ(maj, min) 0
 #endif

 #if !LOCAL_GIF_PREREQ(5, 0)
@@ -316,14 +316,14 @@ static int IsGIF(const WebPData* const data) {
 // Added in v5.0
 typedef struct {
  int DisposalMode;
-#define DISPOSAL_UNSPECIFIED      0       // No disposal specified
-#define DISPOSE_DO_NOT            1       // Leave image in place
-#define DISPOSE_BACKGROUND        2       // Set area to background color
-#define DISPOSE_PREVIOUS          3       // Restore to previous content
-  int UserInputFlag;       // User confirmation required before disposal
-  int DelayTime;           // Pre-display delay in 0.01sec units
-  int TransparentColor;    // Palette index for transparency, -1 if none
-#define NO_TRANSPARENT_COLOR     -1
+#define DISPOSAL_UNSPECIFIED 0  // No disposal specified
+#define DISPOSE_DO_NOT 1        // Leave image in place
+#define DISPOSE_BACKGROUND 2    // Set area to background color
+#define DISPOSE_PREVIOUS 3      // Restore to previous content
+  int UserInputFlag;            // User confirmation required before disposal
+  int DelayTime;                // Pre-display delay in 0.01sec units
+  int TransparentColor;         // Palette index for transparency, -1 if none
+#define NO_TRANSPARENT_COLOR -1
 } GraphicsControlBlock;

 static int DGifExtensionToGCB(const size_t GifExtensionLength,
@@ -357,8 +357,8 @@ static int DGifSavedExtensionToGCB(GifFileType* GifFile, int ImageIndex,
  for (i = 0; i < GifFile->SavedImages[ImageIndex].ExtensionBlockCount; i++) {
    ExtensionBlock* ep = &GifFile->SavedImages[ImageIndex].ExtensionBlocks[i];
    if (ep->Function == GRAPHICS_EXT_FUNC_CODE) {
-      return DGifExtensionToGCB(
-          ep->ByteCount, (const GifByteType*)ep->Bytes, gcb);
+      return DGifExtensionToGCB(ep->ByteCount, (const GifByteType*)ep->Bytes,
+                                gcb);
    }
  }
  return GIF_ERROR;
@@ -377,12 +377,12 @@ static int DGifSavedExtensionToGCB(GifFileType* GifFile, int ImageIndex,
 #endif

 static int IsKeyFrameGIF(const GifImageDesc* prev_desc, int prev_dispose,
-                         const DecodedFrame* const prev_frame,
-                         int canvas_width, int canvas_height) {
+                         const DecodedFrame* const prev_frame, int canvas_width,
+                         int canvas_height) {
  if (prev_frame == NULL) return 1;
  if (prev_dispose == DISPOSE_BACKGROUND) {
-    if (IsFullFrame(prev_desc->Width, prev_desc->Height,
-                    canvas_width, canvas_height)) {
+    if (IsFullFrame(prev_desc->Width, prev_desc->Height, canvas_width,
+                    canvas_height)) {
      return 1;
    }
    if (prev_frame->is_key_frame) return 1;
@@ -403,14 +403,12 @@ static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
  if (transparent_index != NO_TRANSPARENT_COLOR &&
      gif->SBackGroundColor == transparent_index) {
    return 0x00000000;  // Special case: transparent black.
-  } else if (color_map == NULL || color_map->Colors == NULL
-             || gif->SBackGroundColor >= color_map->ColorCount) {
+  } else if (color_map == NULL || color_map->Colors == NULL ||
+             gif->SBackGroundColor >= color_map->ColorCount) {
    return 0xffffffff;  // Invalid: assume white.
  } else {
    const GifColorType color = color_map->Colors[gif->SBackGroundColor];
-    return (0xffu << 24) |
-           (color.Red << 16) |
-           (color.Green << 8) |
+    return (0xffu << 24) | (color.Red << 16) | (color.Green << 8) |
           (color.Blue << 0);
  }
 }
@@ -435,11 +433,10 @@ static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
          (eb1->ByteCount == 11) &&
          (!memcmp(signature, "NETSCAPE2.0", 11) ||
           !memcmp(signature, "ANIMEXTS1.0", 11));
-      if (signature_is_ok &&
-          eb2->Function == CONTINUE_EXT_FUNC_CODE && eb2->ByteCount >= 3 &&
-          eb2->Bytes[0] == 1) {
-        const uint32_t extra_loop = ((uint32_t)(eb2->Bytes[2]) << 8) +
-                                    ((uint32_t)(eb2->Bytes[1]) << 0);
+      if (signature_is_ok && eb2->Function == CONTINUE_EXT_FUNC_CODE &&
+          eb2->ByteCount >= 3 && eb2->Bytes[0] == 1) {
+        const uint32_t extra_loop =
+            ((uint32_t)(eb2->Bytes[2]) << 8) + ((uint32_t)(eb2->Bytes[1]) << 0);
        return (extra_loop > 0) ? extra_loop + 1 : 0;
      }
    }
@@ -535,8 +532,8 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
  image->canvas_height = (uint32_t)gif->SHeight;
  if (image->canvas_width > MAX_CANVAS_SIZE ||
      image->canvas_height > MAX_CANVAS_SIZE) {
-    fprintf(stderr, "Invalid canvas dimension: %d x %d\n",
-            image->canvas_width, image->canvas_height);
+    fprintf(stderr, "Invalid canvas dimension: %d x %d\n", image->canvas_width,
+            image->canvas_height);
    DGifCloseFile(gif, NULL);
    return 0;
  }
@@ -611,11 +608,9 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
        CopyCanvas(prev_rgba, curr_rgba, canvas_width, canvas_height);

        // Dispose previous frame rectangle.
-        prev_frame_disposed =
-            (prev_gcb.DisposalMode == DISPOSE_BACKGROUND ||
-             prev_gcb.DisposalMode == DISPOSE_PREVIOUS);
-        curr_frame_opaque =
-            (curr_gcb.TransparentColor == NO_TRANSPARENT_COLOR);
+        prev_frame_disposed = (prev_gcb.DisposalMode == DISPOSE_BACKGROUND ||
+                               prev_gcb.DisposalMode == DISPOSE_PREVIOUS);
+        curr_frame_opaque = (curr_gcb.TransparentColor == NO_TRANSPARENT_COLOR);
        prev_frame_completely_covered =
            curr_frame_opaque &&
            CoversFrameGIF(&curr_gif_image->ImageDesc, prev_desc);
@@ -643,9 +638,9 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
                uint8_t* const src_frame_rgba =
                    image->frames[src_frame_num].rgba;
                CopyFrameRectangle(src_frame_rgba, curr_rgba,
-                                   canvas_width_in_bytes,
-                                   prev_desc->Left, prev_desc->Top,
-                                   prev_desc->Width, prev_desc->Height);
+                                   canvas_width_in_bytes, prev_desc->Left,
+                                   prev_desc->Top, prev_desc->Width,
+                                   prev_desc->Height);
              } else {
                // Source canvas doesn't exist. So clear previous frame
                // rectangle to background.
@@ -670,8 +665,8 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
    }

    if (dump_frames) {
-      if (!DumpFrame(filename, dump_folder, i, curr_rgba,
-                     canvas_width, canvas_height)) {
+      if (!DumpFrame(filename, dump_folder, i, curr_rgba, canvas_width,
+                     canvas_height)) {
        DGifCloseFile(gif, NULL);
        return 0;
      }
@@ -695,7 +690,8 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
  (void)image;
  (void)dump_frames;
  (void)dump_folder;
-  fprintf(stderr, "GIF support not compiled. Please install the libgif-dev "
+  fprintf(stderr,
+          "GIF support not compiled. Please install the libgif-dev "
          "package before building.\n");
  return 0;
 }
@@ -718,8 +714,8 @@ int ReadAnimatedImage(const char filename[], AnimatedImage* const image,
  }

  if (IsWebP(&webp_data)) {
-    ok = ReadAnimatedWebP(filename, &webp_data, image, dump_frames,
-                          dump_folder);
+    ok =
+        ReadAnimatedWebP(filename, &webp_data, image, dump_frames, dump_folder);
  } else if (IsGIF(&webp_data)) {
    ok = ReadAnimatedGIF(filename, image, dump_frames, dump_folder);
  } else {
@@ -763,8 +759,7 @@ void GetDiffAndPSNR(const uint8_t rgba1[], const uint8_t rgba2[],
        // premultiply R/G/B channels with alpha value
        for (k = 0; k < kAlphaChannel; ++k) {
          Accumulate(rgba1[offset + k] * alpha1 / 255.,
-                     rgba2[offset + k] * alpha2 / 255.,
-                     &f_max_diff, &sse);
+                     rgba2[offset + k] * alpha2 / 255., &f_max_diff, &sse);
        }
      }
    }
--- a/examples/anim_util.h
+++ b/examples/anim_util.h
@@ -22,15 +22,12 @@
 extern "C" {
 #endif

-typedef enum {
-  ANIM_GIF,
-  ANIM_WEBP
-} AnimatedFileFormat;
+typedef enum { ANIM_GIF, ANIM_WEBP } AnimatedFileFormat;

 typedef struct {
-  uint8_t* rgba;         // Decoded and reconstructed full frame.
-  int duration;          // Frame duration in milliseconds.
-  int is_key_frame;      // True if this frame is a key-frame.
+  uint8_t* rgba;     // Decoded and reconstructed full frame.
+  int duration;      // Frame duration in milliseconds.
+  int is_key_frame;  // True if this frame is a key-frame.
 } DecodedFrame;

 typedef struct {
@@ -67,7 +64,7 @@ void GetAnimatedImageVersions(int* const decoder_version,
                              int* const demux_version);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_EXAMPLES_ANIM_UTIL_H_
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@@ -37,10 +37,10 @@
 extern "C" {
 #endif

-extern void* VP8GetCPUInfo;   // opaque forward declaration.
+extern void* VP8GetCPUInfo;  // opaque forward declaration.

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif
 #endif  // WEBP_DLL

@@ -66,12 +66,12 @@ static int ReadYUV(const uint8_t* const data, size_t data_size,

  pic->use_argb = 0;
  if (!WebPPictureAlloc(pic)) return 0;
-  ImgIoUtilCopyPlane(data, pic->width, pic->y, pic->y_stride,
-                     pic->width, pic->height);
-  ImgIoUtilCopyPlane(data + y_plane_size, uv_width,
-                     pic->u, pic->uv_stride, uv_width, uv_height);
-  ImgIoUtilCopyPlane(data + y_plane_size + uv_plane_size, uv_width,
-                     pic->v, pic->uv_stride, uv_width, uv_height);
+  ImgIoUtilCopyPlane(data, pic->width, pic->y, pic->y_stride, pic->width,
+                     pic->height);
+  ImgIoUtilCopyPlane(data + y_plane_size, uv_width, pic->u, pic->uv_stride,
+                     uv_width, uv_height);
+  ImgIoUtilCopyPlane(data + y_plane_size + uv_plane_size, uv_width, pic->v,
+                     pic->uv_stride, uv_width, uv_height);
  return use_argb ? WebPPictureYUVAToARGB(pic) : 1;
 }

@@ -119,7 +119,7 @@ static int ReadPicture(const char* const filename, WebPPicture* const pic,
    // If image size is specified, infer it as YUV format.
    ok = ReadYUV(data, data_size, pic);
  }
- End:
+End:
  if (!ok) {
    WFPRINTF(stderr, "Error! Could not process file %s\n",
             (const W_CHAR*)filename);
@@ -168,8 +168,8 @@ static void PrintValues(const int values[4]) {

 static void PrintFullLosslessInfo(const WebPAuxStats* const stats,
                                  const char* const description) {
-  fprintf(stderr, "Lossless-%s compressed size: %d bytes\n",
-          description, stats->lossless_size);
+  fprintf(stderr, "Lossless-%s compressed size: %d bytes\n", description,
+          stats->lossless_size);
  fprintf(stderr, "  * Header size: %d bytes, image data size: %d\n",
          stats->lossless_hdr_size, stats->lossless_data_size);
  if (stats->lossless_features) {
@@ -209,8 +209,7 @@ static void PrintExtraInfoLossless(const WebPPicture* const pic,
 }

 static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
-                                int full_details,
-                                const char* const file_name) {
+                                int full_details, const char* const file_name) {
  const WebPAuxStats* const stats = pic->stats;
  if (short_output) {
    fprintf(stderr, "%7d %2.2f\n", stats->coded_size, stats->PSNR[3]);
@@ -220,25 +219,25 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
    const int num_skip = stats->block_count[2];
    const int total = num_i4 + num_i16;
    WFPRINTF(stderr, "File:      %s\n", (const W_CHAR*)file_name);
-    fprintf(stderr, "Dimension: %d x %d%s\n",
-            pic->width, pic->height,
+    fprintf(stderr, "Dimension: %d x %d%s\n", pic->width, pic->height,
            stats->alpha_data_size ? " (with alpha)" : "");
-    fprintf(stderr, "Output:    "
+    fprintf(stderr,
+            "Output:    "
            "%d bytes Y-U-V-All-PSNR %2.2f %2.2f %2.2f   %2.2f dB\n"
            "           (%.2f bpp)\n",
-            stats->coded_size,
-            stats->PSNR[0], stats->PSNR[1], stats->PSNR[2], stats->PSNR[3],
-            8.f * stats->coded_size / pic->width / pic->height);
+            stats->coded_size, stats->PSNR[0], stats->PSNR[1], stats->PSNR[2],
+            stats->PSNR[3], 8.f * stats->coded_size / pic->width / pic->height);
    if (total > 0) {
-      int totals[4] = { 0, 0, 0, 0 };
-      fprintf(stderr, "block count:  intra4:     %6d  (%.2f%%)\n"
-                      "              intra16:    %6d  (%.2f%%)\n"
-                      "              skipped:    %6d  (%.2f%%)\n",
-              num_i4, 100.f * num_i4 / total,
-              num_i16, 100.f * num_i16 / total,
+      int totals[4] = {0, 0, 0, 0};
+      fprintf(stderr,
+              "block count:  intra4:     %6d  (%.2f%%)\n"
+              "              intra16:    %6d  (%.2f%%)\n"
+              "              skipped:    %6d  (%.2f%%)\n",
+              num_i4, 100.f * num_i4 / total, num_i16, 100.f * num_i16 / total,
              num_skip, 100.f * num_skip / total);
-      fprintf(stderr, "bytes used:  header:         %6d  (%.1f%%)\n"
-                      "             mode-partition: %6d  (%.1f%%)\n",
+      fprintf(stderr,
+              "bytes used:  header:         %6d  (%.1f%%)\n"
+              "             mode-partition: %6d  (%.1f%%)\n",
              stats->header_bytes[0],
              100.f * stats->header_bytes[0] / stats->coded_size,
              stats->header_bytes[1],
@@ -247,9 +246,10 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
        fprintf(stderr, "             transparency:   %6d (%.1f dB)\n",
                stats->alpha_data_size, stats->PSNR[4]);
      }
-      fprintf(stderr, " Residuals bytes  "
-                      "|segment 1|segment 2|segment 3"
-                      "|segment 4|  total\n");
+      fprintf(stderr,
+              " Residuals bytes  "
+              "|segment 1|segment 2|segment 3"
+              "|segment 4|  total\n");
      if (full_details) {
        fprintf(stderr, "  intra4-coeffs:  ");
        PrintByteCount(stats->residual_bytes[0], stats->coded_size, totals);
@@ -286,11 +286,11 @@ static void PrintMapInfo(const WebPPicture* const pic) {
    for (y = 0; y < mb_h; ++y) {
      for (x = 0; x < mb_w; ++x) {
        const int c = pic->extra_info[x + y * mb_w];
-        if (type == 1) {   // intra4/intra16
+        if (type == 1) {  // intra4/intra16
          fprintf(stderr, "%c", "+."[c]);
-        } else if (type == 2) {    // segments
+        } else if (type == 2) {  // segments
          fprintf(stderr, "%c", ".-*X"[c]);
-        } else if (type == 3) {    // quantizers
+        } else if (type == 3) {  // quantizers
          fprintf(stderr, "%.2d ", c);
        } else if (type == 6 || type == 7) {
          fprintf(stderr, "%3d ", c);
@@ -346,7 +346,7 @@ static int DumpPicture(const WebPPicture* const picture, const char* PGM_name) {
  }
  ok = 1;

- Error:
+Error:
  fclose(f);
  return ok;
 }
@@ -356,9 +356,9 @@ static int DumpPicture(const WebPPicture* const picture, const char* PGM_name) {

 enum {
  METADATA_EXIF = (1 << 0),
-  METADATA_ICC  = (1 << 1),
-  METADATA_XMP  = (1 << 2),
-  METADATA_ALL  = METADATA_EXIF | METADATA_ICC | METADATA_XMP
+  METADATA_ICC = (1 << 1),
+  METADATA_XMP = (1 << 2),
+  METADATA_ALL = METADATA_EXIF | METADATA_ICC | METADATA_XMP
 };

 static const int kChunkHeaderSize = 8;
@@ -411,9 +411,9 @@ static int WriteMetadataChunk(FILE* const out, const char fourcc[4],

 // Sets 'flag' in 'vp8x_flags' and updates 'metadata_size' with the size of the
 // chunk if there is metadata and 'keep' is true.
-static int UpdateFlagsAndSize(const MetadataPayload* const payload,
-                              int keep, int flag,
-                              uint32_t* vp8x_flags, uint64_t* metadata_size) {
+static int UpdateFlagsAndSize(const MetadataPayload* const payload, int keep,
+                              int flag, uint32_t* vp8x_flags,
+                              uint64_t* metadata_size) {
  if (keep && payload->bytes != NULL && payload->size > 0) {
    *vp8x_flags |= flag;
    *metadata_size += kChunkHeaderSize + payload->size + (payload->size & 1);
@@ -434,23 +434,23 @@ static int WriteWebPWithMetadata(FILE* const out,
                                 int* const metadata_written) {
  const char kVP8XHeader[] = "VP8X\x0a\x00\x00\x00";
  const int kAlphaFlag = 0x10;
-  const int kEXIFFlag  = 0x08;
-  const int kICCPFlag  = 0x20;
-  const int kXMPFlag   = 0x04;
+  const int kEXIFFlag = 0x08;
+  const int kICCPFlag = 0x20;
+  const int kXMPFlag = 0x04;
  const size_t kRiffHeaderSize = 12;
  const size_t kMaxChunkPayload = ~0 - kChunkHeaderSize - 1;
  const size_t kMinSize = kRiffHeaderSize + kChunkHeaderSize;
  uint32_t flags = 0;
  uint64_t metadata_size = 0;
-  const int write_exif = UpdateFlagsAndSize(&metadata->exif,
-                                            !!(keep_metadata & METADATA_EXIF),
-                                            kEXIFFlag, &flags, &metadata_size);
-  const int write_iccp = UpdateFlagsAndSize(&metadata->iccp,
-                                            !!(keep_metadata & METADATA_ICC),
-                                            kICCPFlag, &flags, &metadata_size);
-  const int write_xmp  = UpdateFlagsAndSize(&metadata->xmp,
-                                            !!(keep_metadata & METADATA_XMP),
-                                            kXMPFlag, &flags, &metadata_size);
+  const int write_exif =
+      UpdateFlagsAndSize(&metadata->exif, !!(keep_metadata & METADATA_EXIF),
+                         kEXIFFlag, &flags, &metadata_size);
+  const int write_iccp =
+      UpdateFlagsAndSize(&metadata->iccp, !!(keep_metadata & METADATA_ICC),
+                         kICCPFlag, &flags, &metadata_size);
+  const int write_xmp =
+      UpdateFlagsAndSize(&metadata->xmp, !!(keep_metadata & METADATA_XMP),
+                         kXMPFlag, &flags, &metadata_size);
  uint8_t* webp = memory_writer->mem;
  size_t webp_size = memory_writer->size;

@@ -458,17 +458,18 @@ static int WriteWebPWithMetadata(FILE* const out,

  if (webp_size < kMinSize) return 0;
  if (webp_size - kChunkHeaderSize + metadata_size > kMaxChunkPayload) {
-    fprintf(stderr, "Error! Addition of metadata would exceed "
-                    "container size limit.\n");
+    fprintf(stderr,
+            "Error! Addition of metadata would exceed "
+            "container size limit.\n");
    return 0;
  }

  if (metadata_size > 0) {
    const int kVP8XChunkSize = 18;
    const int has_vp8x = !memcmp(webp + kRiffHeaderSize, "VP8X", kTagSize);
-    const uint32_t riff_size = (uint32_t)(webp_size - kChunkHeaderSize +
-                                          (has_vp8x ? 0 : kVP8XChunkSize) +
-                                          metadata_size);
+    const uint32_t riff_size =
+        (uint32_t)(webp_size - kChunkHeaderSize +
+                   (has_vp8x ? 0 : kVP8XChunkSize) + metadata_size);
    // RIFF
    int ok = (fwrite(webp, kTagSize, 1, out) == 1);
    // RIFF size (file header size is not recorded)
@@ -527,8 +528,7 @@ enum {
  RESIZE_MODE_DEFAULT = RESIZE_MODE_ALWAYS
 };

-static void ApplyResizeMode(const int resize_mode,
-                            const WebPPicture* const pic,
+static void ApplyResizeMode(const int resize_mode, const WebPPicture* const pic,
                            int* const resize_w, int* const resize_h) {
  const int src_w = pic->width;
  const int src_h = pic->height;
@@ -536,8 +536,7 @@ static void ApplyResizeMode(const int resize_mode,
  const int dst_h = *resize_h;

  if (resize_mode == RESIZE_MODE_DOWN_ONLY) {
-    if ((dst_w == 0 && src_h <= dst_h) ||
-        (dst_h == 0 && src_w <= dst_w) ||
+    if ((dst_w == 0 && src_h <= dst_h) || (dst_h == 0 && src_w <= dst_w) ||
        (src_w <= dst_w && src_h <= dst_h)) {
      *resize_w = *resize_h = 0;
    }
@@ -551,8 +550,7 @@ static void ApplyResizeMode(const int resize_mode,
 //------------------------------------------------------------------------------

 static int ProgressReport(int percent, const WebPPicture* const picture) {
-  fprintf(stderr, "[%s]: %3d %%      \r",
-          (char*)picture->user_data, percent);
+  fprintf(stderr, "[%s]: %3d %%      \r", (char*)picture->user_data, percent);
  return 1;  // all ok
 }

@@ -569,8 +567,9 @@ static void HelpShort(void) {
 static void HelpLong(void) {
  printf("Usage:\n");
  printf(" cwebp [-preset <...>] [options] in_file [-o out_file]\n\n");
-  printf("If input size (-s) for an image is not specified, it is\n"
-         "assumed to be a PNG, JPEG, TIFF or WebP file.\n");
+  printf(
+      "If input size (-s) for an image is not specified, it is\n"
+      "assumed to be a PNG, JPEG, TIFF or WebP file.\n");
  printf("Note: Animated PNG and WebP files are not supported.\n");
 #ifdef HAVE_WINCODEC_H
  printf("Windows builds can take as input any of the files handled by WIC.\n");
@@ -578,46 +577,59 @@ static void HelpLong(void) {
  printf("\nOptions:\n");
  printf("  -h / -help ............. short help\n");
  printf("  -H / -longhelp ......... long help\n");
-  printf("  -q <float> ............. quality factor (0:small..100:big), "
-         "default=75\n");
-  printf("  -alpha_q <int> ......... transparency-compression quality (0..100),"
-         "\n                           default=100\n");
+  printf(
+      "  -q <float> ............. quality factor (0:small..100:big), "
+      "default=75\n");
+  printf(
+      "  -alpha_q <int> ......... transparency-compression quality (0..100),"
+      "\n                           default=100\n");
  printf("  -preset <string> ....... preset setting, one of:\n");
  printf("                            default, photo, picture,\n");
  printf("                            drawing, icon, text\n");
  printf("     -preset must come first, as it overwrites other parameters\n");
-  printf("  -z <int> ............... activates lossless preset with given\n"
-         "                           level in [0:fast, ..., 9:slowest]\n");
+  printf(
+      "  -z <int> ............... activates lossless preset with given\n"
+      "                           level in [0:fast, ..., 9:slowest]\n");
  printf("\n");
-  printf("  -m <int> ............... compression method (0=fast, 6=slowest), "
-         "default=4\n");
-  printf("  -segments <int> ........ number of segments to use (1..4), "
-         "default=4\n");
+  printf(
+      "  -m <int> ............... compression method (0=fast, 6=slowest), "
+      "default=4\n");
+  printf(
+      "  -segments <int> ........ number of segments to use (1..4), "
+      "default=4\n");
  printf("  -size <int> ............ target size (in bytes)\n");
  printf("  -psnr <float> .......... target PSNR (in dB. typically: 42)\n");
  printf("\n");
  printf("  -s <int> <int> ......... input size (width x height) for YUV\n");
-  printf("  -sns <int> ............. spatial noise shaping (0:off, 100:max), "
-         "default=50\n");
-  printf("  -f <int> ............... filter strength (0=off..100), "
-         "default=60\n");
-  printf("  -sharpness <int> ....... "
-         "filter sharpness (0:most .. 7:least sharp), default=0\n");
-  printf("  -strong ................ use strong filter instead "
-                                     "of simple (default)\n");
+  printf(
+      "  -sns <int> ............. spatial noise shaping (0:off, 100:max), "
+      "default=50\n");
+  printf(
+      "  -f <int> ............... filter strength (0=off..100), "
+      "default=60\n");
+  printf(
+      "  -sharpness <int> ....... "
+      "filter sharpness (0:most .. 7:least sharp), default=0\n");
+  printf(
+      "  -strong ................ use strong filter instead "
+      "of simple (default)\n");
  printf("  -nostrong .............. use simple filter instead of strong\n");
-  printf("  -sharp_yuv ............. use sharper (and slower) RGB->YUV "
-                                     "conversion\n");
+  printf(
+      "  -sharp_yuv ............. use sharper (and slower) RGB->YUV "
+      "conversion\n");
  printf("  -partition_limit <int> . limit quality to fit the 512k limit on\n");
-  printf("                           "
-         "the first partition (0=no degradation ... 100=full)\n");
+  printf(
+      "                           "
+      "the first partition (0=no degradation ... 100=full)\n");
  printf("  -pass <int> ............ analysis pass number (1..10)\n");
-  printf("  -qrange <min> <max> .... specifies the permissible quality range\n"
-         "                           (default: 0 100)\n");
+  printf(
+      "  -qrange <min> <max> .... specifies the permissible quality range\n"
+      "                           (default: 0 100)\n");
  printf("  -crop <x> <y> <w> <h> .. crop picture with the given rectangle\n");
  printf("  -resize <w> <h> ........ resize picture (*after* any cropping)\n");
-  printf("  -resize_mode <string> .. one of: up_only, down_only,"
-         " always (default)\n");
+  printf(
+      "  -resize_mode <string> .. one of: up_only, down_only,"
+      " always (default)\n");
  printf("  -mt .................... use multi-threading if available\n");
  printf("  -low_memory ............ reduce memory usage (slower encoding)\n");
  printf("  -map <int> ............. print map of extra info\n");
@@ -625,20 +637,24 @@ static void HelpLong(void) {
  printf("  -print_ssim ............ prints averaged SSIM distortion\n");
  printf("  -print_lsim ............ prints local-similarity distortion\n");
  printf("  -d <file.pgm> .......... dump the compressed output (PGM file)\n");
-  printf("  -alpha_method <int> .... transparency-compression method (0..1), "
-         "default=1\n");
+  printf(
+      "  -alpha_method <int> .... transparency-compression method (0..1), "
+      "default=1\n");
  printf("  -alpha_filter <string> . predictive filtering for alpha plane,\n");
  printf("                           one of: none, fast (default) or best\n");
-  printf("  -exact ................. preserve RGB values in transparent area, "
-         "default=off\n");
-  printf("  -blend_alpha <hex> ..... blend colors against background color\n"
-         "                           expressed as RGB values written in\n"
-         "                           hexadecimal, e.g. 0xc0e0d0 for red=0xc0\n"
-         "                           green=0xe0 and blue=0xd0\n");
+  printf(
+      "  -exact ................. preserve RGB values in transparent area, "
+      "default=off\n");
+  printf(
+      "  -blend_alpha <hex> ..... blend colors against background color\n"
+      "                           expressed as RGB values written in\n"
+      "                           hexadecimal, e.g. 0xc0e0d0 for red=0xc0\n"
+      "                           green=0xe0 and blue=0xd0\n");
  printf("  -noalpha ............... discard any transparency information\n");
  printf("  -lossless .............. encode image losslessly, default=off\n");
-  printf("  -near_lossless <int> ... use near-lossless image preprocessing\n"
-         "                           (0..100=off), default=100\n");
+  printf(
+      "  -near_lossless <int> ... use near-lossless image preprocessing\n"
+      "                           (0..100=off), default=100\n");
  printf("  -hint <string> ......... specify image characteristics hint,\n");
  printf("                           one of: photo, picture or graph\n");

@@ -646,8 +662,9 @@ static void HelpLong(void) {
  printf("  -metadata <string> ..... comma separated list of metadata to\n");
  printf("                           ");
  printf("copy from the input to the output if present.\n");
-  printf("                           "
-         "Valid values: all, none (default), exif, icc, xmp\n");
+  printf(
+      "                           "
+      "Valid values: all, none (default), exif, icc, xmp\n");

  printf("\n");
  printf("  -short ................. condense printed message\n");
@@ -656,8 +673,9 @@ static void HelpLong(void) {
 #ifndef WEBP_DLL
  printf("  -noasm ................. disable all assembly optimizations\n");
 #endif
-  printf("  -v ..................... verbose, e.g. print encoding/decoding "
-         "times\n");
+  printf(
+      "  -v ..................... verbose, e.g. print encoding/decoding "
+      "times\n");
  printf("  -progress .............. report encoding progress\n");
  printf("\n");
  printf("Experimental Options:\n");
@@ -672,30 +690,29 @@ static void HelpLong(void) {
 // Error messages

 static const char* const kErrorMessages[VP8_ENC_ERROR_LAST] = {
-  "OK",
-  "OUT_OF_MEMORY: Out of memory allocating objects",
-  "BITSTREAM_OUT_OF_MEMORY: Out of memory re-allocating byte buffer",
-  "NULL_PARAMETER: NULL parameter passed to function",
-  "INVALID_CONFIGURATION: configuration is invalid",
-  "BAD_DIMENSION: Bad picture dimension. Maximum width and height "
-  "allowed is 16383 pixels.",
-  "PARTITION0_OVERFLOW: Partition #0 is too big to fit 512k.\n"
-  "To reduce the size of this partition, try using less segments "
-  "with the -segments option, and eventually reduce the number of "
-  "header bits using -partition_limit. More details are available "
-  "in the manual (`man cwebp`)",
-  "PARTITION_OVERFLOW: Partition is too big to fit 16M",
-  "BAD_WRITE: Picture writer returned an I/O error",
-  "FILE_TOO_BIG: File would be too big to fit in 4G",
-  "USER_ABORT: encoding abort requested by user"
-};
+    "OK",
+    "OUT_OF_MEMORY: Out of memory allocating objects",
+    "BITSTREAM_OUT_OF_MEMORY: Out of memory re-allocating byte buffer",
+    "NULL_PARAMETER: NULL parameter passed to function",
+    "INVALID_CONFIGURATION: configuration is invalid",
+    "BAD_DIMENSION: Bad picture dimension. Maximum width and height "
+    "allowed is 16383 pixels.",
+    "PARTITION0_OVERFLOW: Partition #0 is too big to fit 512k.\n"
+    "To reduce the size of this partition, try using less segments "
+    "with the -segments option, and eventually reduce the number of "
+    "header bits using -partition_limit. More details are available "
+    "in the manual (`man cwebp`)",
+    "PARTITION_OVERFLOW: Partition is too big to fit 16M",
+    "BAD_WRITE: Picture writer returned an I/O error",
+    "FILE_TOO_BIG: File would be too big to fit in 4G",
+    "USER_ABORT: encoding abort requested by user"};

 //------------------------------------------------------------------------------

 // Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
  int return_value = EXIT_FAILURE;
-  const char* in_file = NULL, *out_file = NULL, *dump_file = NULL;
+  const char *in_file = NULL, *out_file = NULL, *dump_file = NULL;
  FILE* out = NULL;
  int c;
  int short_output = 0;
@@ -712,8 +729,8 @@ int main(int argc, const char* argv[]) {
  int keep_metadata = 0;
  int metadata_written = 0;
  WebPPicture picture;
-  int print_distortion = -1;        // -1=off, 0=PSNR, 1=SSIM, 2=LSIM
-  WebPPicture original_picture;    // when PSNR or SSIM is requested
+  int print_distortion = -1;     // -1=off, 0=PSNR, 1=SSIM, 2=LSIM
+  WebPPicture original_picture;  // when PSNR or SSIM is requested
  WebPConfig config;
  WebPAuxStats stats;
  WebPMemoryWriter memory_writer;
@@ -725,8 +742,7 @@ int main(int argc, const char* argv[]) {

  MetadataInit(&metadata);
  WebPMemoryWriterInit(&memory_writer);
-  if (!WebPPictureInit(&picture) ||
-      !WebPPictureInit(&original_picture) ||
+  if (!WebPPictureInit(&picture) || !WebPPictureInit(&original_picture) ||
      !WebPConfigInit(&config)) {
    fprintf(stderr, "Error! Version mismatch!\n");
    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
@@ -765,18 +781,17 @@ int main(int argc, const char* argv[]) {
      picture.width = ExUtilGetInt(argv[++c], 0, &parse_error);
      picture.height = ExUtilGetInt(argv[++c], 0, &parse_error);
      if (picture.width > WEBP_MAX_DIMENSION || picture.width < 0 ||
-          picture.height > WEBP_MAX_DIMENSION ||  picture.height < 0) {
-        fprintf(stderr,
-                "Specified dimension (%d x %d) is out of range.\n",
+          picture.height > WEBP_MAX_DIMENSION || picture.height < 0) {
+        fprintf(stderr, "Specified dimension (%d x %d) is out of range.\n",
                picture.width, picture.height);
        goto Error;
      }
    } else if (!strcmp(argv[c], "-m") && c + 1 < argc) {
      config.method = ExUtilGetInt(argv[++c], 0, &parse_error);
-      use_lossless_preset = 0;   // disable -z option
+      use_lossless_preset = 0;  // disable -z option
    } else if (!strcmp(argv[c], "-q") && c + 1 < argc) {
      config.quality = ExUtilGetFloat(argv[++c], &parse_error);
-      use_lossless_preset = 0;   // disable -z option
+      use_lossless_preset = 0;  // disable -z option
    } else if (!strcmp(argv[c], "-z") && c + 1 < argc) {
      lossless_preset = ExUtilGetInt(argv[++c], 0, &parse_error);
      if (use_lossless_preset != 0) use_lossless_preset = 1;
@@ -892,11 +907,10 @@ int main(int argc, const char* argv[]) {
    } else if (!strcmp(argv[c], "-version")) {
      const int version = WebPGetEncoderVersion();
      const int sharpyuv_version = SharpYuvGetVersion();
-      printf("%d.%d.%d\n",
-             (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
-      printf("libsharpyuv: %d.%d.%d\n",
-             (sharpyuv_version >> 24) & 0xff, (sharpyuv_version >> 16) & 0xffff,
-             sharpyuv_version & 0xff);
+      printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff,
+             version & 0xff);
+      printf("libsharpyuv: %d.%d.%d\n", (sharpyuv_version >> 24) & 0xff,
+             (sharpyuv_version >> 16) & 0xffff, sharpyuv_version & 0xff);
      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-progress")) {
      show_progress = 1;
@@ -930,11 +944,11 @@ int main(int argc, const char* argv[]) {
        const char* option;
        int flag;
      } kTokens[] = {
-        { "all",  METADATA_ALL },
-        { "none", 0 },
-        { "exif", METADATA_EXIF },
-        { "icc",  METADATA_ICC },
-        { "xmp",  METADATA_XMP },
+          {"all", METADATA_ALL},    //
+          {"none", 0},              //
+          {"exif", METADATA_EXIF},  //
+          {"icc", METADATA_ICC},    //
+          {"xmp", METADATA_XMP},    //
      };
      const size_t kNumTokens = sizeof(kTokens) / sizeof(kTokens[0]);
      const char* start = argv[++c];
@@ -966,8 +980,9 @@ int main(int argc, const char* argv[]) {
 #ifdef HAVE_WINCODEC_H
      if (keep_metadata != 0 && keep_metadata != METADATA_ICC) {
        // TODO(jzern): remove when -metadata is supported on all platforms.
-        fprintf(stderr, "Warning: only ICC profile extraction is currently"
-                        " supported on this platform!\n");
+        fprintf(stderr,
+                "Warning: only ICC profile extraction is currently"
+                " supported on this platform!\n");
      }
 #endif
    } else if (!strcmp(argv[c], "-v")) {
@@ -1005,12 +1020,14 @@ int main(int argc, const char* argv[]) {
  // warning for such options.
  if (!quiet && config.lossless == 1) {
    if (config.target_size > 0 || config.target_PSNR > 0) {
-      fprintf(stderr, "Encoding for specified size or PSNR is not supported"
-                      " for lossless encoding. Ignoring such option(s)!\n");
+      fprintf(stderr,
+              "Encoding for specified size or PSNR is not supported"
+              " for lossless encoding. Ignoring such option(s)!\n");
    }
    if (config.partition_limit > 0) {
-      fprintf(stderr, "Partition limit option is not required for lossless"
-                      " encoding. Ignoring this option!\n");
+      fprintf(stderr,
+              "Partition limit option is not required for lossless"
+              " encoding. Ignoring this option!\n");
    }
  }
  // If a target size or PSNR was given, but somehow the -pass option was
@@ -1027,9 +1044,9 @@ int main(int argc, const char* argv[]) {
  // Read the input. We need to decide if we prefer ARGB or YUVA
  // samples, depending on the expected compression mode (this saves
  // some conversion steps).
-  picture.use_argb = (config.lossless || config.use_sharp_yuv ||
-                      config.preprocessing > 0 ||
-                      crop || (resize_w | resize_h) > 0);
+  picture.use_argb =
+      (config.lossless || config.use_sharp_yuv || config.preprocessing > 0 ||
+       crop || (resize_w | resize_h) > 0);
  if (verbose) {
    StopwatchReset(&stop_watch);
  }
@@ -1177,8 +1194,8 @@ int main(int argc, const char* argv[]) {
  }
  if (!WebPEncode(&config, &picture)) {
    fprintf(stderr, "Error! Cannot encode picture as WebP\n");
-    fprintf(stderr, "Error code: %d (%s)\n",
-            picture.error_code, kErrorMessages[picture.error_code]);
+    fprintf(stderr, "Error code: %d (%s)\n", picture.error_code,
+            kErrorMessages[picture.error_code]);
    goto Error;
  }
  if (verbose) {
@@ -1221,8 +1238,9 @@ int main(int argc, const char* argv[]) {
  // Write the YUV planes to a PGM file. Only available for lossy.
  if (dump_file) {
    if (picture.use_argb) {
-      fprintf(stderr, "Warning: can't dump file (-d option) "
-                      "in lossless mode.\n");
+      fprintf(stderr,
+              "Warning: can't dump file (-d option) "
+              "in lossless mode.\n");
    } else if (!DumpPicture(&picture, dump_file)) {
      WFPRINTF(stderr, "Warning, couldn't dump picture %s\n",
               (const W_CHAR*)dump_file);
@@ -1267,18 +1285,18 @@ int main(int argc, const char* argv[]) {
    if (!short_output && picture.extra_info_type > 0) {
      PrintMapInfo(&picture);
    }
-    if (print_distortion >= 0) {    // print distortion
-      static const char* distortion_names[] = { "PSNR", "SSIM", "LSIM" };
+    if (print_distortion >= 0) {  // print distortion
+      static const char* distortion_names[] = {"PSNR", "SSIM", "LSIM"};
      float values[5];
-      if (!WebPPictureDistortion(&picture, &original_picture,
-                                 print_distortion, values)) {
+      if (!WebPPictureDistortion(&picture, &original_picture, print_distortion,
+                                 values)) {
        fprintf(stderr, "Error while computing the distortion.\n");
        goto Error;
      }
      if (!short_output) {
        fprintf(stderr, "%s: ", distortion_names[print_distortion]);
-        fprintf(stderr, "B:%.2f G:%.2f R:%.2f A:%.2f  Total:%.2f\n",
-                values[0], values[1], values[2], values[3], values[4]);
+        fprintf(stderr, "B:%.2f G:%.2f R:%.2f A:%.2f  Total:%.2f\n", values[0],
+                values[1], values[2], values[3], values[4]);
      } else {
        fprintf(stderr, "%7d %.4f\n", picture.stats->coded_size, values[4]);
      }
@@ -1289,7 +1307,7 @@ int main(int argc, const char* argv[]) {
  }
  return_value = EXIT_SUCCESS;

- Error:
+Error:
  WebPMemoryWriterClear(&memory_writer);
  WebPFree(picture.extra_info);
  MetadataFree(&metadata);
--- a/examples/dwebp.c
+++ b/examples/dwebp.c
@@ -35,14 +35,13 @@ static int quiet = 0;
 extern "C" {
 #endif

-extern void* VP8GetCPUInfo;   // opaque forward declaration.
+extern void* VP8GetCPUInfo;  // opaque forward declaration.

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif
 #endif  // WEBP_DLL

-
 static int SaveOutput(const WebPDecBuffer* const buffer,
                      WebPOutputFileFormat format, const char* const out_file) {
  const int use_stdout = (out_file != NULL) && !WSTRCMP(out_file, "-");
@@ -77,43 +76,42 @@ static int SaveOutput(const WebPDecBuffer* const buffer,
 }

 static void Help(void) {
-  printf("Usage: dwebp in_file [options] [-o out_file]\n\n"
-         "Decodes the WebP image file to PNG format [Default].\n"
-         "Note: Animated WebP files are not supported.\n\n"
-         "Use following options to convert into alternate image formats:\n"
-         "  -pam ......... save the raw RGBA samples as a color PAM\n"
-         "  -ppm ......... save the raw RGB samples as a color PPM\n"
-         "  -bmp ......... save as uncompressed BMP format\n"
-         "  -tiff ........ save as uncompressed TIFF format\n"
-         "  -pgm ......... save the raw YUV samples as a grayscale PGM\n"
-         "                 file with IMC4 layout\n"
-         "  -yuv ......... save the raw YUV samples in flat layout\n"
-         "\n"
-         " Other options are:\n"
-         "  -version ..... print version number and exit\n"
-         "  -nofancy ..... don't use the fancy YUV420 upscaler\n"
-         "  -nofilter .... disable in-loop filtering\n"
-         "  -nodither .... disable dithering\n"
-         "  -dither <d> .. dithering strength (in 0..100)\n"
-         "  -alpha_dither  use alpha-plane dithering if needed\n"
-         "  -mt .......... use multi-threading\n"
-         "  -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
-         "  -resize <w> <h> ......... resize output (*after* any cropping)\n"
-         "  -flip ........ flip the output vertically\n"
-         "  -alpha ....... only save the alpha plane\n"
-         "  -incremental . use incremental decoding (useful for tests)\n"
-         "  -h ........... this help message\n"
-         "  -v ........... verbose (e.g. print encoding/decoding times)\n"
-         "  -quiet ....... quiet mode, don't print anything\n"
+  printf(
+      "Usage: dwebp in_file [options] [-o out_file]\n\n"
+      "Decodes the WebP image file to PNG format [Default].\n"
+      "Note: Animated WebP files are not supported.\n\n"
+      "Use following options to convert into alternate image formats:\n"
+      "  -pam ......... save the raw RGBA samples as a color PAM\n"
+      "  -ppm ......... save the raw RGB samples as a color PPM\n"
+      "  -bmp ......... save as uncompressed BMP format\n"
+      "  -tiff ........ save as uncompressed TIFF format\n"
+      "  -pgm ......... save the raw YUV samples as a grayscale PGM\n"
+      "                 file with IMC4 layout\n"
+      "  -yuv ......... save the raw YUV samples in flat layout\n"
+      "\n"
+      " Other options are:\n"
+      "  -version ..... print version number and exit\n"
+      "  -nofancy ..... don't use the fancy YUV420 upscaler\n"
+      "  -nofilter .... disable in-loop filtering\n"
+      "  -nodither .... disable dithering\n"
+      "  -dither <d> .. dithering strength (in 0..100)\n"
+      "  -alpha_dither  use alpha-plane dithering if needed\n"
+      "  -mt .......... use multi-threading\n"
+      "  -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
+      "  -resize <w> <h> ......... resize output (*after* any cropping)\n"
+      "  -flip ........ flip the output vertically\n"
+      "  -alpha ....... only save the alpha plane\n"
+      "  -incremental . use incremental decoding (useful for tests)\n"
+      "  -h ........... this help message\n"
+      "  -v ........... verbose (e.g. print encoding/decoding times)\n"
+      "  -quiet ....... quiet mode, don't print anything\n"
 #ifndef WEBP_DLL
-         "  -noasm ....... disable all assembly optimizations\n"
+      "  -noasm ....... disable all assembly optimizations\n"
 #endif
-        );
+  );
 }

-static const char* const kFormatType[] = {
-  "unspecified", "lossy", "lossless"
-};
+static const char* const kFormatType[] = {"unspecified", "lossy", "lossless"};

 static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
                                       WebPOutputFileFormat format,
@@ -130,23 +128,23 @@ static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
    h = config->options.crop_height;
  }
  if (format >= RGB && format <= rgbA_4444) {
-    const int bpp = (format == RGB || format == BGR) ? 3
-                  : (format == RGBA_4444 || format == rgbA_4444 ||
-                     format == RGB_565) ? 2
-                  : 4;
-    uint32_t stride = bpp * w + 7;   // <- just for exercising
+    const int bpp =
+        (format == RGB || format == BGR)                                    ? 3
+        : (format == RGBA_4444 || format == rgbA_4444 || format == RGB_565) ? 2
+                                                                            : 4;
+    uint32_t stride = bpp * w + 7;  // <- just for exercising
    external_buffer = (uint8_t*)WebPMalloc(stride * h);
    if (external_buffer == NULL) return NULL;
    output_buffer->u.RGBA.stride = stride;
    output_buffer->u.RGBA.size = stride * h;
    output_buffer->u.RGBA.rgba = external_buffer;
-  } else {    // YUV and YUVA
+  } else {  // YUV and YUVA
    const int has_alpha = WebPIsAlphaMode(output_buffer->colorspace);
    uint8_t* tmp;
    uint32_t stride = w + 3;
    uint32_t uv_stride = (w + 1) / 2 + 13;
-    uint32_t total_size = stride * h * (has_alpha ? 2 : 1)
-                        + 2 * uv_stride * (h + 1) / 2;
+    uint32_t total_size =
+        stride * h * (has_alpha ? 2 : 1) + 2 * uv_stride * (h + 1) / 2;
    assert(format >= YUV && format <= YUVA);
    external_buffer = (uint8_t*)WebPMalloc(total_size);
    if (external_buffer == NULL) return NULL;
@@ -228,8 +226,8 @@ int main(int argc, const char* argv[]) {
      quiet = 1;
    } else if (!strcmp(argv[c], "-version")) {
      const int version = WebPGetDecoderVersion();
-      printf("%d.%d.%d\n",
-             (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+      printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff,
+             version & 0xff);
      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else if (!strcmp(argv[c], "-pgm")) {
      format = PGM;
@@ -237,19 +235,32 @@ int main(int argc, const char* argv[]) {
      format = RAW_YUV;
    } else if (!strcmp(argv[c], "-pixel_format") && c < argc - 1) {
      const char* const fmt = argv[++c];
-      if      (!strcmp(fmt, "RGB"))  format = RGB;
-      else if (!strcmp(fmt, "RGBA")) format = RGBA;
-      else if (!strcmp(fmt, "BGR"))  format = BGR;
-      else if (!strcmp(fmt, "BGRA")) format = BGRA;
-      else if (!strcmp(fmt, "ARGB")) format = ARGB;
-      else if (!strcmp(fmt, "RGBA_4444")) format = RGBA_4444;
-      else if (!strcmp(fmt, "RGB_565")) format = RGB_565;
-      else if (!strcmp(fmt, "rgbA")) format = rgbA;
-      else if (!strcmp(fmt, "bgrA")) format = bgrA;
-      else if (!strcmp(fmt, "Argb")) format = Argb;
-      else if (!strcmp(fmt, "rgbA_4444")) format = rgbA_4444;
-      else if (!strcmp(fmt, "YUV"))  format = YUV;
-      else if (!strcmp(fmt, "YUVA")) format = YUVA;
+      if (!strcmp(fmt, "RGB"))
+        format = RGB;
+      else if (!strcmp(fmt, "RGBA"))
+        format = RGBA;
+      else if (!strcmp(fmt, "BGR"))
+        format = BGR;
+      else if (!strcmp(fmt, "BGRA"))
+        format = BGRA;
+      else if (!strcmp(fmt, "ARGB"))
+        format = ARGB;
+      else if (!strcmp(fmt, "RGBA_4444"))
+        format = RGBA_4444;
+      else if (!strcmp(fmt, "RGB_565"))
+        format = RGB_565;
+      else if (!strcmp(fmt, "rgbA"))
+        format = rgbA;
+      else if (!strcmp(fmt, "bgrA"))
+        format = bgrA;
+      else if (!strcmp(fmt, "Argb"))
+        format = Argb;
+      else if (!strcmp(fmt, "rgbA_4444"))
+        format = rgbA_4444;
+      else if (!strcmp(fmt, "YUV"))
+        format = YUV;
+      else if (!strcmp(fmt, "YUVA"))
+        format = YUVA;
      else {
        fprintf(stderr, "Can't parse pixel_format %s\n", fmt);
        parse_error = 1;
@@ -271,14 +282,14 @@ int main(int argc, const char* argv[]) {
          ExUtilGetInt(argv[++c], 0, &parse_error);
    } else if (!strcmp(argv[c], "-crop") && c < argc - 4) {
      config.options.use_cropping = 1;
-      config.options.crop_left   = ExUtilGetInt(argv[++c], 0, &parse_error);
-      config.options.crop_top    = ExUtilGetInt(argv[++c], 0, &parse_error);
-      config.options.crop_width  = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.options.crop_left = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.options.crop_top = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.options.crop_width = ExUtilGetInt(argv[++c], 0, &parse_error);
      config.options.crop_height = ExUtilGetInt(argv[++c], 0, &parse_error);
    } else if ((!strcmp(argv[c], "-scale") || !strcmp(argv[c], "-resize")) &&
               c < argc - 2) {  // '-scale' is left for compatibility
      config.options.use_scaling = 1;
-      config.options.scaled_width  = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.options.scaled_width = ExUtilGetInt(argv[++c], 0, &parse_error);
      config.options.scaled_height = ExUtilGetInt(argv[++c], 0, &parse_error);
    } else if (!strcmp(argv[c], "-flip")) {
      config.options.flip = 1;
@@ -350,25 +361,52 @@ int main(int argc, const char* argv[]) {
        output_buffer->colorspace = MODE_YUVA;
        break;
      // forced modes:
-      case RGB: output_buffer->colorspace = MODE_RGB; break;
-      case RGBA: output_buffer->colorspace = MODE_RGBA; break;
-      case BGR: output_buffer->colorspace = MODE_BGR; break;
-      case BGRA: output_buffer->colorspace = MODE_BGRA; break;
-      case ARGB: output_buffer->colorspace = MODE_ARGB; break;
-      case RGBA_4444: output_buffer->colorspace = MODE_RGBA_4444; break;
-      case RGB_565: output_buffer->colorspace = MODE_RGB_565; break;
-      case rgbA: output_buffer->colorspace = MODE_rgbA; break;
-      case bgrA: output_buffer->colorspace = MODE_bgrA; break;
-      case Argb: output_buffer->colorspace = MODE_Argb; break;
-      case rgbA_4444: output_buffer->colorspace = MODE_rgbA_4444; break;
-      case YUV: output_buffer->colorspace = MODE_YUV; break;
-      case YUVA: output_buffer->colorspace = MODE_YUVA; break;
-      default: goto Exit;
+      case RGB:
+        output_buffer->colorspace = MODE_RGB;
+        break;
+      case RGBA:
+        output_buffer->colorspace = MODE_RGBA;
+        break;
+      case BGR:
+        output_buffer->colorspace = MODE_BGR;
+        break;
+      case BGRA:
+        output_buffer->colorspace = MODE_BGRA;
+        break;
+      case ARGB:
+        output_buffer->colorspace = MODE_ARGB;
+        break;
+      case RGBA_4444:
+        output_buffer->colorspace = MODE_RGBA_4444;
+        break;
+      case RGB_565:
+        output_buffer->colorspace = MODE_RGB_565;
+        break;
+      case rgbA:
+        output_buffer->colorspace = MODE_rgbA;
+        break;
+      case bgrA:
+        output_buffer->colorspace = MODE_bgrA;
+        break;
+      case Argb:
+        output_buffer->colorspace = MODE_Argb;
+        break;
+      case rgbA_4444:
+        output_buffer->colorspace = MODE_rgbA_4444;
+        break;
+      case YUV:
+        output_buffer->colorspace = MODE_YUV;
+        break;
+      case YUVA:
+        output_buffer->colorspace = MODE_YUVA;
+        break;
+      default:
+        goto Exit;
    }

    if (use_external_memory > 0 && format >= RGB) {
-      external_buffer = AllocateExternalBuffer(&config, format,
-                                               use_external_memory);
+      external_buffer =
+          AllocateExternalBuffer(&config, format, use_external_memory);
      if (external_buffer == NULL) goto Exit;
    }

@@ -410,11 +448,12 @@ int main(int argc, const char* argv[]) {
              output_buffer->width, output_buffer->height,
              bitstream->has_alpha ? " (with alpha)" : "",
              kFormatType[bitstream->format]);
-      fprintf(stderr, "Nothing written; "
-                      "use -o flag to save the result as e.g. PNG.\n");
+      fprintf(stderr,
+              "Nothing written; "
+              "use -o flag to save the result as e.g. PNG.\n");
    }
  }
- Exit:
+Exit:
  WebPFreeDecBuffer(output_buffer);
  WebPFree((void*)external_buffer);
  WebPFree((void*)data);
--- a/examples/example_util.c
+++ b/examples/example_util.c
@@ -46,7 +46,7 @@ int ExUtilGetInts(const char* v, int base, int max_output, int output[]) {
    if (error) return -1;
    output[n] = value;
    v = strchr(v, ',');
-    if (v != NULL) ++v;   // skip over the trailing ','
+    if (v != NULL) ++v;  // skip over the trailing ','
  }
  return n;
 }
@@ -110,8 +110,7 @@ int ExUtilInitCommandLineArguments(int argc, const char* argv[],
    }

    argc = 0;
-    for (cur = strtok((char*)args->argv_data.bytes, sep);
-         cur != NULL;
+    for (cur = strtok((char*)args->argv_data.bytes, sep); cur != NULL;
         cur = strtok(NULL, sep)) {
      if (argc == MAX_ARGC) {
        fprintf(stderr, "ERROR: Arguments limit %d reached\n", MAX_ARGC);
--- a/examples/example_util.h
+++ b/examples/example_util.h
@@ -13,8 +13,8 @@
 #ifndef WEBP_EXAMPLES_EXAMPLE_UTIL_H_
 #define WEBP_EXAMPLES_EXAMPLE_UTIL_H_

-#include "webp/types.h"
 #include "webp/mux_types.h"
+#include "webp/types.h"

 #ifdef __cplusplus
 extern "C" {
@@ -64,7 +64,7 @@ int ExUtilInitCommandLineArguments(int argc, const char* argv[],
 void ExUtilDeleteCommandLineArguments(CommandLineArguments* const args);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_EXAMPLES_EXAMPLE_UTIL_H_
--- a/examples/gif2webp.c
+++ b/examples/gif2webp.c
@@ -28,14 +28,15 @@
 #endif

 #include <gif_lib.h>
-#include "sharpyuv/sharpyuv.h"
-#include "webp/encode.h"
-#include "webp/mux.h"
+
 #include "../examples/example_util.h"
 #include "../imageio/imageio_util.h"
 #include "./gifdec.h"
 #include "./unicode.h"
 #include "./unicode_gif.h"
+#include "sharpyuv/sharpyuv.h"
+#include "webp/encode.h"
+#include "webp/mux.h"

 #if !defined(STDIN_FILENO)
 #define STDIN_FILENO 0
@@ -46,9 +47,8 @@
 static int transparent_index = GIF_INDEX_INVALID;  // Opaque by default.

 static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
-  "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
-  "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"
-};
+    "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
+    "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"};

 static const char* ErrorString(WebPMuxError err) {
  assert(err <= WEBP_MUX_NOT_FOUND && err >= WEBP_MUX_NOT_ENOUGH_DATA);
@@ -56,9 +56,9 @@ static const char* ErrorString(WebPMuxError err) {
 }

 enum {
-  METADATA_ICC  = (1 << 0),
-  METADATA_XMP  = (1 << 1),
-  METADATA_ALL  = METADATA_ICC | METADATA_XMP
+  METADATA_ICC = (1 << 0),
+  METADATA_XMP = (1 << 1),
+  METADATA_ALL = METADATA_ICC | METADATA_XMP
 };

 //------------------------------------------------------------------------------
@@ -69,20 +69,25 @@ static void Help(void) {
  printf("Options:\n");
  printf("  -h / -help ............. this help\n");
  printf("  -lossy ................. encode image using lossy compression\n");
-  printf("  -mixed ................. for each frame in the image, pick lossy\n"
-         "                           or lossless compression heuristically\n");
-  printf("  -near_lossless <int> ... use near-lossless image preprocessing\n"
-         "                           (0..100=off), default=100\n");
-  printf("  -sharp_yuv ............. use sharper (and slower) RGB->YUV "
-                                    "conversion\n"
-         "                           (lossy only)\n");
+  printf(
+      "  -mixed ................. for each frame in the image, pick lossy\n"
+      "                           or lossless compression heuristically\n");
+  printf(
+      "  -near_lossless <int> ... use near-lossless image preprocessing\n"
+      "                           (0..100=off), default=100\n");
+  printf(
+      "  -sharp_yuv ............. use sharper (and slower) RGB->YUV "
+      "conversion\n"
+      "                           (lossy only)\n");
  printf("  -q <float> ............. quality factor (0:small..100:big)\n");
-  printf("  -m <int> ............... compression method (0=fast, 6=slowest), "
-         "default=4\n");
-  printf("  -min_size .............. minimize output size (default:off)\n"
-         "                           lossless compression by default; can be\n"
-         "                           combined with -q, -m, -lossy or -mixed\n"
-         "                           options\n");
+  printf(
+      "  -m <int> ............... compression method (0=fast, 6=slowest), "
+      "default=4\n");
+  printf(
+      "  -min_size .............. minimize output size (default:off)\n"
+      "                           lossless compression by default; can be\n"
+      "                           combined with -q, -m, -lossy or -mixed\n"
+      "                           options\n");
  printf("  -kmin <int> ............ min distance between key frames\n");
  printf("  -kmax <int> ............ max distance between key frames\n");
  printf("  -f <int> ............... filter strength (0=off..100)\n");
@@ -109,21 +114,21 @@ int main(int argc, const char* argv[]) {
  int gif_error = GIF_ERROR;
  WebPMuxError err = WEBP_MUX_OK;
  int ok = 0;
-  const W_CHAR* in_file = NULL, *out_file = NULL;
+  const W_CHAR *in_file = NULL, *out_file = NULL;
  GifFileType* gif = NULL;
  int frame_duration = 0;
  int frame_timestamp = 0;
  GIFDisposeMethod orig_dispose = GIF_DISPOSE_NONE;

-  WebPPicture frame;                // Frame rectangle only (not disposed).
-  WebPPicture curr_canvas;          // Not disposed.
-  WebPPicture prev_canvas;          // Disposed.
+  WebPPicture frame;        // Frame rectangle only (not disposed).
+  WebPPicture curr_canvas;  // Not disposed.
+  WebPPicture prev_canvas;  // Disposed.

  WebPAnimEncoder* enc = NULL;
  WebPAnimEncoderOptions enc_options;
  WebPConfig config;

-  int frame_number = 0;     // Whether we are processing the first frame.
+  int frame_number = 0;  // Whether we are processing the first frame.
  int done;
  int c;
  int quiet = 0;
@@ -131,7 +136,7 @@ int main(int argc, const char* argv[]) {

  int keep_metadata = METADATA_XMP;  // ICC not output by default.
  WebPData icc_data;
-  int stored_icc = 0;         // Whether we have already stored an ICC profile.
+  int stored_icc = 0;  // Whether we have already stored an ICC profile.
  WebPData xmp_data;
  int stored_xmp = 0;         // Whether we have already stored an XMP profile.
  int loop_count = 0;         // default: infinite
@@ -198,10 +203,10 @@ int main(int argc, const char* argv[]) {
        const char* option;
        int flag;
      } kTokens[] = {
-        { "all",  METADATA_ALL },
-        { "none", 0 },
-        { "icc",  METADATA_ICC },
-        { "xmp",  METADATA_XMP },
+          {"all", METADATA_ALL},
+          {"none", 0},
+          {"icc", METADATA_ICC},
+          {"xmp", METADATA_XMP},
      };
      const size_t kNumTokens = sizeof(kTokens) / sizeof(*kTokens);
      const char* start = argv[++c];
@@ -319,8 +324,8 @@ int main(int argc, const char* argv[]) {
              goto End;
            }
            if (verbose) {
-              printf("Fixed canvas screen dimension to: %d x %d\n",
-                     gif->SWidth, gif->SHeight);
+              printf("Fixed canvas screen dimension to: %d x %d\n", gif->SWidth,
+                     gif->SHeight);
            }
          }
          // Allocate current buffer.
@@ -418,7 +423,7 @@ int main(int argc, const char* argv[]) {
            break;
          }
          case APPLICATION_EXT_FUNC_CODE: {
-            if (data[0] != 11) break;    // Chunk is too short
+            if (data[0] != 11) break;  // Chunk is too short
            if (!memcmp(data + 1, "NETSCAPE2.0", 11) ||
                !memcmp(data + 1, "ANIMEXTS1.0", 11)) {
              if (!GIFReadLoopCount(gif, &data, &loop_count)) {
@@ -529,7 +534,7 @@ int main(int argc, const char* argv[]) {
      }
    }

-    if (stored_icc) {   // Add ICCP chunk.
+    if (stored_icc) {  // Add ICCP chunk.
      err = WebPMuxSetChunk(mux, "ICCP", &icc_data, 1);
      if (verbose) {
        fprintf(stderr, "ICC size: %d\n", (int)icc_data.size);
@@ -541,7 +546,7 @@ int main(int argc, const char* argv[]) {
      }
    }

-    if (stored_xmp) {   // Add XMP chunk.
+    if (stored_xmp) {  // Add XMP chunk.
      err = WebPMuxSetChunk(mux, "XMP ", &xmp_data, 1);
      if (verbose) {
        fprintf(stderr, "XMP size: %d\n", (int)xmp_data.size);
@@ -555,8 +560,10 @@ int main(int argc, const char* argv[]) {

    err = WebPMuxAssemble(mux, &webp_data);
    if (err != WEBP_MUX_OK) {
-      fprintf(stderr, "ERROR (%s): Could not assemble when re-muxing to add "
-              "loop count/metadata.\n", ErrorString(err));
+      fprintf(stderr,
+              "ERROR (%s): Could not assemble when re-muxing to add "
+              "loop count/metadata.\n",
+              ErrorString(err));
      goto End;
    }
  }
@@ -569,8 +576,7 @@ int main(int argc, const char* argv[]) {
    }
    if (!quiet) {
      if (!WSTRCMP(out_file, "-")) {
-        fprintf(stderr, "Saved %d bytes to STDIO\n",
-                (int)webp_data.size);
+        fprintf(stderr, "Saved %d bytes to STDIO\n", (int)webp_data.size);
      } else {
        WFPRINTF(stderr, "Saved output file (%d bytes): %s\n",
                 (int)webp_data.size, out_file);
@@ -578,8 +584,10 @@ int main(int argc, const char* argv[]) {
    }
  } else {
    if (!quiet) {
-      fprintf(stderr, "Nothing written; use -o flag to save the result "
-                      "(%d bytes).\n", (int)webp_data.size);
+      fprintf(stderr,
+              "Nothing written; use -o flag to save the result "
+              "(%d bytes).\n",
+              (int)webp_data.size);
    }
  }

@@ -587,7 +595,7 @@ int main(int argc, const char* argv[]) {
  ok = 1;
  gif_error = GIF_OK;

- End:
+End:
  WebPDataClear(&icc_data);
  WebPDataClear(&xmp_data);
  WebPMuxDelete(mux);
@@ -601,7 +609,7 @@ int main(int argc, const char* argv[]) {
    GIFDisplayError(gif, gif_error);
  }
  if (gif != NULL) {
-#if LOCAL_GIF_PREREQ(5,1)
+#if LOCAL_GIF_PREREQ(5, 1)
    DGifCloseFile(gif, &gif_error);
 #else
    DGifCloseFile(gif);
--- a/examples/gifdec.c
+++ b/examples/gifdec.c
@@ -19,22 +19,21 @@
 #include <string.h>

 #include "webp/encode.h"
-#include "webp/types.h"
 #include "webp/mux_types.h"
+#include "webp/types.h"

 #define GIF_TRANSPARENT_COLOR 0x00000000u
-#define GIF_WHITE_COLOR       0xffffffffu
-#define GIF_TRANSPARENT_MASK  0x01
-#define GIF_DISPOSE_MASK      0x07
-#define GIF_DISPOSE_SHIFT     2
+#define GIF_WHITE_COLOR 0xffffffffu
+#define GIF_TRANSPARENT_MASK 0x01
+#define GIF_DISPOSE_MASK 0x07
+#define GIF_DISPOSE_SHIFT 2

 // from utils/utils.h
 #ifdef __cplusplus
 extern "C" {
 #endif
-extern void WebPCopyPlane(const uint8_t* src, int src_stride,
-                          uint8_t* dst, int dst_stride,
-                          int width, int height);
+extern void WebPCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst,
+                          int dst_stride, int width, int height);
 extern void WebPCopyPixels(const WebPPicture* const src,
                           WebPPicture* const dst);
 #ifdef __cplusplus
@@ -47,18 +46,16 @@ void GIFGetBackgroundColor(const ColorMapObject* const color_map,
  if (transparent_index != GIF_INDEX_INVALID &&
      bgcolor_index == transparent_index) {
    *bgcolor = GIF_TRANSPARENT_COLOR;  // Special case.
-  } else if (color_map == NULL || color_map->Colors == NULL
-             || bgcolor_index >= color_map->ColorCount) {
+  } else if (color_map == NULL || color_map->Colors == NULL ||
+             bgcolor_index >= color_map->ColorCount) {
    *bgcolor = GIF_WHITE_COLOR;
    fprintf(stderr,
            "GIF decode warning: invalid background color index. Assuming "
            "white background.\n");
  } else {
    const GifColorType color = color_map->Colors[bgcolor_index];
-    *bgcolor = (0xffu       << 24)
-             | (color.Red   << 16)
-             | (color.Green <<  8)
-             | (color.Blue  <<  0);
+    *bgcolor = (0xffu << 24) | (color.Red << 16) | (color.Green << 8) |
+               (color.Blue << 0);
  }
 }

@@ -117,9 +114,8 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
  const GifImageDesc* const image_desc = &gif->Image;
  uint32_t* dst = NULL;
  uint8_t* tmp = NULL;
-  const GIFFrameRect rect = {
-      image_desc->Left, image_desc->Top, image_desc->Width, image_desc->Height
-  };
+  const GIFFrameRect rect = {image_desc->Left, image_desc->Top,
+                             image_desc->Width, image_desc->Height};
  const uint64_t memory_needed = 4 * rect.width * (uint64_t)rect.height;
  int ok = 0;
  *gif_rect = rect;
@@ -130,8 +126,8 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
  }

  // Use a view for the sub-picture:
-  if (!WebPPictureView(picture, rect.x_offset, rect.y_offset,
-                       rect.width, rect.height, &sub_image)) {
+  if (!WebPPictureView(picture, rect.x_offset, rect.y_offset, rect.width,
+                       rect.height, &sub_image)) {
    fprintf(stderr, "Sub-image %dx%d at position %d,%d is invalid!\n",
            rect.width, rect.height, rect.x_offset, rect.y_offset);
    return 0;
@@ -143,8 +139,8 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,

  if (image_desc->Interlace) {  // Interlaced image.
    // We need 4 passes, with the following offsets and jumps.
-    const int interlace_offsets[] = { 0, 4, 2, 1 };
-    const int interlace_jumps[]   = { 8, 8, 4, 2 };
+    const int interlace_offsets[] = {0, 4, 2, 1};
+    const int interlace_jumps[] = {8, 8, 4, 2};
    int pass;
    for (pass = 0; pass < 4; ++pass) {
      const size_t stride = (size_t)sub_image.argb_stride;
@@ -166,7 +162,7 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
  }
  ok = 1;

- End:
+End:
  if (!ok) picture->error_code = sub_image.error_code;
  WebPPictureFree(&sub_image);
  WebPFree(tmp);
@@ -184,7 +180,7 @@ int GIFReadLoopCount(GifFileType* const gif, GifByteType** const buf,
    return 0;  // Loop count sub-block missing.
  }
  if ((*buf)[0] < 3 || (*buf)[1] != 1) {
-    return 0;   // wrong size/marker
+    return 0;  // wrong size/marker
  }
  *loop_count = (*buf)[2] | ((*buf)[3] << 8);
  return 1;
@@ -220,8 +216,7 @@ int GIFReadMetadata(GifFileType* const gif, GifByteType** const buf,
    if (tmp == NULL) {
      return 0;
    }
-    memcpy((void*)(tmp + metadata->size),
-           subblock.bytes, subblock.size);
+    memcpy((void*)(tmp + metadata->size), subblock.bytes, subblock.size);
    metadata->bytes = tmp;
    metadata->size += subblock.size;
  }
@@ -235,8 +230,8 @@ int GIFReadMetadata(GifFileType* const gif, GifByteType** const buf,
  return 1;
 }

-static void ClearRectangle(WebPPicture* const picture,
-                           int left, int top, int width, int height) {
+static void ClearRectangle(WebPPicture* const picture, int left, int top,
+                           int width, int height) {
  int i, j;
  const size_t stride = picture->argb_stride;
  uint32_t* dst = picture->argb + top * stride + left;
@@ -247,8 +242,8 @@ static void ClearRectangle(WebPPicture* const picture,

 void GIFClearPic(WebPPicture* const pic, const GIFFrameRect* const rect) {
  if (rect != NULL) {
-    ClearRectangle(pic, rect->x_offset, rect->y_offset,
-                   rect->width, rect->height);
+    ClearRectangle(pic, rect->x_offset, rect->y_offset, rect->width,
+                   rect->height);
  } else {
    ClearRectangle(pic, 0, 0, pic->width, pic->height);
  }
@@ -266,15 +261,14 @@ void GIFDisposeFrame(GIFDisposeMethod dispose, const GIFFrameRect* const rect,
    GIFClearPic(curr_canvas, rect);
  } else if (dispose == GIF_DISPOSE_RESTORE_PREVIOUS) {
    const size_t src_stride = prev_canvas->argb_stride;
-    const uint32_t* const src = prev_canvas->argb + rect->x_offset
-                              + rect->y_offset * src_stride;
+    const uint32_t* const src =
+        prev_canvas->argb + rect->x_offset + rect->y_offset * src_stride;
    const size_t dst_stride = curr_canvas->argb_stride;
-    uint32_t* const dst = curr_canvas->argb + rect->x_offset
-                        + rect->y_offset * dst_stride;
+    uint32_t* const dst =
+        curr_canvas->argb + rect->x_offset + rect->y_offset * dst_stride;
    assert(prev_canvas != NULL);
-    WebPCopyPlane((uint8_t*)src, (int)(4 * src_stride),
-                  (uint8_t*)dst, (int)(4 * dst_stride),
-                  4 * rect->width, rect->height);
+    WebPCopyPlane((uint8_t*)src, (int)(4 * src_stride), (uint8_t*)dst,
+                  (int)(4 * dst_stride), 4 * rect->width, rect->height);
  }
 }

@@ -297,11 +291,11 @@ void GIFBlendFrames(const WebPPicture* const src,

 void GIFDisplayError(const GifFileType* const gif, int gif_error) {
  // libgif 4.2.0 has retired PrintGifError() and added GifErrorString().
-#if LOCAL_GIF_PREREQ(4,2)
-#if LOCAL_GIF_PREREQ(5,0)
+#if LOCAL_GIF_PREREQ(4, 2)
+#if LOCAL_GIF_PREREQ(5, 0)
  // Static string actually, hence the const char* cast.
-  const char* error_str = (const char*)GifErrorString(
-      (gif == NULL) ? gif_error : gif->Error);
+  const char* error_str =
+      (const char*)GifErrorString((gif == NULL) ? gif_error : gif->Error);
 #else
  const char* error_str = (const char*)GifErrorString();
  (void)gif;
@@ -319,7 +313,8 @@ void GIFDisplayError(const GifFileType* const gif, int gif_error) {
 #else  // !WEBP_HAVE_GIF

 static void ErrorGIFNotAvailable(void) {
-  fprintf(stderr, "GIF support not compiled. Please install the libgif-dev "
+  fprintf(stderr,
+          "GIF support not compiled. Please install the libgif-dev "
          "package before building.\n");
 }

--- a/examples/gifdec.h
+++ b/examples/gifdec.h
@@ -30,12 +30,11 @@ extern "C" {

 // GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
 #if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
-# define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
-# define LOCAL_GIF_PREREQ(maj, min) \
-    (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
+#define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
+#define LOCAL_GIF_PREREQ(maj, min) (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
 #else
-# define LOCAL_GIF_VERSION 0
-# define LOCAL_GIF_PREREQ(maj, min) 0
+#define LOCAL_GIF_VERSION 0
+#define LOCAL_GIF_PREREQ(maj, min) 0
 #endif

 #define GIF_INDEX_INVALID (-1)
@@ -111,7 +110,7 @@ void GIFCopyPixels(const struct WebPPicture* const src,
                   struct WebPPicture* const dst);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_EXAMPLES_GIFDEC_H_
--- a/examples/img2webp.c
+++ b/examples/img2webp.c
@@ -43,16 +43,20 @@ static void Help(void) {

  printf("File-level options (only used at the start of compression):\n");
  printf(" -min_size ............ minimize size\n");
-  printf(" -kmax <int> .......... maximum number of frame between key-frames\n"
-         "                        (0=only keyframes)\n");
-  printf(" -kmin <int> .......... minimum number of frame between key-frames\n"
-         "                        (0=disable key-frames altogether)\n");
+  printf(
+      " -kmax <int> .......... maximum number of frame between key-frames\n"
+      "                        (0=only keyframes)\n");
+  printf(
+      " -kmin <int> .......... minimum number of frame between key-frames\n"
+      "                        (0=disable key-frames altogether)\n");
  printf(" -mixed ............... use mixed lossy/lossless automatic mode\n");
-  printf(" -near_lossless <int> . use near-lossless image preprocessing\n"
-         "                        (0..100=off), default=100\n");
-  printf(" -sharp_yuv ........... use sharper (and slower) RGB->YUV "
-                                  "conversion\n                        "
-                                  "(lossy only)\n");
+  printf(
+      " -near_lossless <int> . use near-lossless image preprocessing\n"
+      "                        (0..100=off), default=100\n");
+  printf(
+      " -sharp_yuv ........... use sharper (and slower) RGB->YUV "
+      "conversion\n                        "
+      "(lossy only)\n");
  printf(" -loop <int> .......... loop count (default: 0, = infinite loop)\n");
  printf(" -v ................... verbose mode\n");
  printf(" -h ................... this help\n");
@@ -64,20 +68,25 @@ static void Help(void) {
  printf(" -lossless ............ use lossless mode (default)\n");
  printf(" -lossy ............... use lossy mode\n");
  printf(" -q <float> ........... quality\n");
-  printf(" -m <int> ............. compression method (0=fast, 6=slowest), "
-         "default=4\n");
-  printf(" -exact, -noexact ..... preserve or alter RGB values in transparent "
-                                  "area\n"
-         "                        (default: -noexact, may cause artifacts\n"
-         "                                  with lossy animations)\n");
+  printf(
+      " -m <int> ............. compression method (0=fast, 6=slowest), "
+      "default=4\n");
+  printf(
+      " -exact, -noexact ..... preserve or alter RGB values in transparent "
+      "area\n"
+      "                        (default: -noexact, may cause artifacts\n"
+      "                                  with lossy animations)\n");

  printf("\n");
-  printf("example: img2webp -loop 2 in0.png -lossy in1.jpg\n"
-         "                  -d 80 in2.tiff -o out.webp\n");
-  printf("\nNote: if a single file name is passed as the argument, the "
-         "arguments will be\n");
-  printf("tokenized from this file. The file name must not start with "
-         "the character '-'.\n");
+  printf(
+      "example: img2webp -loop 2 in0.png -lossy in1.jpg\n"
+      "                  -d 80 in2.tiff -o out.webp\n");
+  printf(
+      "\nNote: if a single file name is passed as the argument, the "
+      "arguments will be\n");
+  printf(
+      "tokenized from this file. The file name must not start with "
+      "the character '-'.\n");
  printf("\nSupported input formats:\n  %s\n",
         WebPGetEnabledInputFileFormats());
 }
@@ -127,7 +136,7 @@ static int SetLoopCount(int loop_count, WebPData* const webp_data) {
    ok = (err == WEBP_MUX_OK);
  }

- End:
+End:
  WebPMuxDelete(mux);
  if (!ok) {
    fprintf(stderr, "Error during loop-count setting\n");
@@ -166,8 +175,7 @@ int main(int argc, const char* argv[]) {
  argv = cmd_args.argv;

  WebPDataInit(&webp_data);
-  if (!WebPAnimEncoderOptionsInit(&anim_config) ||
-      !WebPConfigInit(&config) ||
+  if (!WebPAnimEncoderOptionsInit(&anim_config) || !WebPConfigInit(&config) ||
      !WebPPictureInit(&pic)) {
    fprintf(stderr, "Library version mismatch!\n");
    ok = 0;
@@ -225,7 +233,7 @@ int main(int argc, const char* argv[]) {
      }
      ok = !parse_error;
      if (!ok) goto End;
-      argv[c] = NULL;   // mark option as 'parsed' during 1st pass
+      argv[c] = NULL;  // mark option as 'parsed' during 1st pass
    } else {
      have_input |= 1;
    }
@@ -242,7 +250,7 @@ int main(int argc, const char* argv[]) {
  config.lossless = 1;
  for (c = 0; ok && c < argc; ++c) {
    if (argv[c] == NULL) continue;
-    if (argv[c][0] == '-') {    // parse local options
+    if (argv[c][0] == '-') {  // parse local options
      int parse_error = 0;
      if (!strcmp(argv[c], "-lossy")) {
        if (!anim_config.allow_mixed) config.lossless = 0;
@@ -263,7 +271,7 @@ int main(int argc, const char* argv[]) {
      } else if (!strcmp(argv[c], "-noexact")) {
        config.exact = 0;
      } else {
-        parse_error = 1;   // shouldn't be here.
+        parse_error = 1;  // shouldn't be here.
        fprintf(stderr, "Unknown option [%s]\n", argv[c]);
      }
      ok = !parse_error;
@@ -286,7 +294,7 @@ int main(int argc, const char* argv[]) {
    if (!ok) goto End;

    if (enc == NULL) {
-      width  = pic.width;
+      width = pic.width;
      height = pic.height;
      enc = WebPAnimEncoderNew(width, height, &anim_config);
      ok = (enc != NULL);
@@ -298,8 +306,9 @@ int main(int argc, const char* argv[]) {
    if (ok) {
      ok = (width == pic.width && height == pic.height);
      if (!ok) {
-        fprintf(stderr, "Frame #%d dimension mismatched! "
-                        "Got %d x %d. Was expecting %d x %d.\n",
+        fprintf(stderr,
+                "Frame #%d dimension mismatched! "
+                "Got %d x %d. Was expecting %d x %d.\n",
                pic_num, pic.width, pic.height, width, height);
      }
    }
@@ -314,8 +323,8 @@ int main(int argc, const char* argv[]) {
    if (!ok) goto End;

    if (verbose) {
-      WFPRINTF(stderr, "Added frame #%3d at time %4d (file: %s)\n",
-               pic_num, timestamp_ms, GET_WARGV_SHIFTED(argv, c));
+      WFPRINTF(stderr, "Added frame #%3d at time %4d (file: %s)\n", pic_num,
+               timestamp_ms, GET_WARGV_SHIFTED(argv, c));
    }
    timestamp_ms += duration;
    ++pic_num;
@@ -323,8 +332,10 @@ int main(int argc, const char* argv[]) {

  for (c = last_input_index + 1; c < argc; ++c) {
    if (argv[c] != NULL) {
-      fprintf(stderr, "Warning: unused option [%s]!"
-                      " Frame options go before the input frame.\n", argv[c]);
+      fprintf(stderr,
+              "Warning: unused option [%s]!"
+              " Frame options go before the input frame.\n",
+              argv[c]);
    }
  }

@@ -335,7 +346,7 @@ int main(int argc, const char* argv[]) {
    fprintf(stderr, "Error during final animation assembly.\n");
  }

- End:
+End:
  // free resources
  WebPAnimEncoderDelete(enc);

@@ -353,8 +364,8 @@ int main(int argc, const char* argv[]) {
  }

  if (ok) {
-    fprintf(stderr, "[%d frames, %u bytes].\n",
-            pic_num, (unsigned int)webp_data.size);
+    fprintf(stderr, "[%d frames, %u bytes].\n", pic_num,
+            (unsigned int)webp_data.size);
  }
  WebPDataClear(&webp_data);
  ExUtilDeleteCommandLineArguments(&cmd_args);
--- a/examples/stopwatch.h
+++ b/examples/stopwatch.h
@@ -28,17 +28,13 @@ static WEBP_INLINE void StopwatchReset(Stopwatch* watch) {
 static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
  const LARGE_INTEGER old_value = *watch;
  LARGE_INTEGER freq;
-  if (!QueryPerformanceCounter(watch))
-    return 0.0;
-  if (!QueryPerformanceFrequency(&freq))
-    return 0.0;
-  if (freq.QuadPart == 0)
-    return 0.0;
+  if (!QueryPerformanceCounter(watch)) return 0.0;
+  if (!QueryPerformanceFrequency(&freq)) return 0.0;
+  if (freq.QuadPart == 0) return 0.0;
  return (watch->QuadPart - old_value.QuadPart) / (double)freq.QuadPart;
 }

-
-#else    /* !_WIN32 */
+#else                /* !_WIN32 */
 #include <string.h>  // memcpy
 #include <sys/time.h>

@@ -58,6 +54,6 @@ static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
  return delta_sec + delta_usec / 1000000.0;
 }

-#endif   /* _WIN32 */
+#endif /* _WIN32 */

 #endif  // WEBP_EXAMPLES_STOPWATCH_H_
--- a/examples/unicode.h
+++ b/examples/unicode.h
@@ -25,20 +25,20 @@

 #include <fcntl.h>
 #include <io.h>
+#include <shellapi.h>
 #include <wchar.h>
 #include <windows.h>
-#include <shellapi.h>

 // Create a wchar_t array containing Unicode parameters.
-#define INIT_WARGV(ARGC, ARGV)                                                \
-  int wargc;                                                                  \
-  const W_CHAR** const wargv =                                                \
-      (const W_CHAR**)CommandLineToArgvW(GetCommandLineW(), &wargc);          \
-  do {                                                                        \
-    if (wargv == NULL || wargc != (ARGC)) {                                   \
-      fprintf(stderr, "Error: Unable to get Unicode arguments.\n");           \
-      FREE_WARGV_AND_RETURN(-1);                                              \
-    }                                                                         \
+#define INIT_WARGV(ARGC, ARGV)                                       \
+  int wargc;                                                         \
+  const W_CHAR** const wargv =                                       \
+      (const W_CHAR**)CommandLineToArgvW(GetCommandLineW(), &wargc); \
+  do {                                                               \
+    if (wargv == NULL || wargc != (ARGC)) {                          \
+      fprintf(stderr, "Error: Unable to get Unicode arguments.\n");  \
+      FREE_WARGV_AND_RETURN(-1);                                     \
+    }                                                                \
  } while (0)

 // Use this to get a Unicode argument (e.g. file path).
--- a/examples/unicode_gif.h
+++ b/examples/unicode_gif.h
@@ -28,6 +28,7 @@

 #include <gif_lib.h>
 #include <string.h>
+
 #include "./gifdec.h"

 #if !defined(STDIN_FILENO)
--- a/examples/vwebp.c
+++ b/examples/vwebp.c
@@ -38,12 +38,11 @@
 #include <qcms.h>
 #endif

-#include "webp/decode.h"
-#include "webp/demux.h"
-
 #include "../examples/example_util.h"
 #include "../imageio/imageio_util.h"
 #include "./unicode.h"
+#include "webp/decode.h"
+#include "webp/demux.h"

 #if defined(_MSC_VER) && _MSC_VER < 1900
 #define snprintf _snprintf
@@ -135,9 +134,8 @@ static int ApplyColorProfile(const WebPData* const profile,
  }

  qcms_profile_precache_output_transform(output_profile);
-  transform = qcms_transform_create(input_profile, input_type,
-                                    output_profile, output_type,
-                                    intent);
+  transform = qcms_transform_create(input_profile, input_type, output_profile,
+                                    output_type, intent);
  if (transform == NULL) {
    fprintf(stderr, "Error creating color transform!\n");
    goto Error;
@@ -149,7 +147,7 @@ static int ApplyColorProfile(const WebPData* const profile,
  }
  ok = 1;

- Error:
+Error:
  if (input_profile != NULL) qcms_profile_release(input_profile);
  if (output_profile != NULL) qcms_profile_release(output_profile);
  if (transform != NULL) qcms_transform_release(transform);
@@ -164,7 +162,7 @@ static int ApplyColorProfile(const WebPData* const profile,
 //------------------------------------------------------------------------------
 // File decoding

-static int Decode(void) {   // Fills kParams.curr_frame
+static int Decode(void) {  // Fills kParams.curr_frame
  const WebPIterator* const curr = &kParams.curr_frame;
  WebPDecoderConfig* const config = &kParams.config;
  WebPDecBuffer* const output_buffer = &config->output;
@@ -172,8 +170,8 @@ static int Decode(void) {   // Fills kParams.curr_frame

  ClearPreviousPic();
  output_buffer->colorspace = MODE_RGBA;
-  ok = (WebPDecode(curr->fragment.bytes, curr->fragment.size,
-                   config) == VP8_STATUS_OK);
+  ok = (WebPDecode(curr->fragment.bytes, curr->fragment.size, config) ==
+        VP8_STATUS_OK);
  if (!ok) {
    fprintf(stderr, "Decoding of frame #%d failed!\n", curr->frame_num);
  } else {
@@ -341,8 +339,7 @@ static void DrawBackground(void) {
    glPushMatrix();
    glLoadIdentity();
    glColor4f(GetColorf(kParams.bg_color, 16),  // BGRA from spec
-              GetColorf(kParams.bg_color, 8),
-              GetColorf(kParams.bg_color, 0),
+              GetColorf(kParams.bg_color, 8), GetColorf(kParams.bg_color, 0),
              GetColorf(kParams.bg_color, 24));
    glRecti(-1, -1, +1, +1);
    glPopMatrix();
@@ -402,8 +399,7 @@ static void HandleDisplay(void) {

  *prev = *curr;

-  glDrawPixels(pic->width, pic->height,
-               GL_RGBA, GL_UNSIGNED_BYTE,
+  glDrawPixels(pic->width, pic->height, GL_RGBA, GL_UNSIGNED_BYTE,
               (GLvoid*)pic->u.RGBA.rgba);
  if (kParams.print_info) {
    char tmp[32];
@@ -417,8 +413,8 @@ static void HandleDisplay(void) {
    glRasterPos2f(-0.95f, 0.80f);
    PrintString(tmp);
    if (curr->x_offset != 0 || curr->y_offset != 0) {
-      snprintf(tmp, sizeof(tmp), " (offset:%d,%d)",
-               curr->x_offset, curr->y_offset);
+      snprintf(tmp, sizeof(tmp), " (offset:%d,%d)", curr->x_offset,
+               curr->y_offset);
      glRasterPos2f(-0.95f, 0.70f);
      PrintString(tmp);
    }
@@ -571,8 +567,8 @@ int main(int argc, char* argv[]) {
    FREE_WARGV_AND_RETURN(EXIT_FAILURE);
  }

-  if (!ImgIoUtilReadFile(kParams.file_name,
-                         &kParams.data.bytes, &kParams.data.size)) {
+  if (!ImgIoUtilReadFile(kParams.file_name, &kParams.data.bytes,
+                         &kParams.data.size)) {
    goto Error;
  }

@@ -603,7 +599,8 @@ int main(int argc, char* argv[]) {
    if (!WebPDemuxGetChunk(kParams.dmux, "ICCP", 1, &kParams.iccp)) goto Error;
    printf("VP8X: Found color profile\n");
 #else
-    fprintf(stderr, "Warning: color profile present, but qcms is unavailable!\n"
+    fprintf(stderr,
+            "Warning: color profile present, but qcms is unavailable!\n"
            "Build libqcms from Mozilla or Chromium and define WEBP_HAVE_QCMS "
            "before building.\n");
 #endif
@@ -614,8 +611,8 @@ int main(int argc, char* argv[]) {
  kParams.has_animation = (curr->num_frames > 1);
  kParams.loop_count = (int)WebPDemuxGetI(kParams.dmux, WEBP_FF_LOOP_COUNT);
  kParams.bg_color = WebPDemuxGetI(kParams.dmux, WEBP_FF_BACKGROUND_COLOR);
-  printf("VP8X: Found %d images in file (loop count = %d)\n",
-         curr->num_frames, kParams.loop_count);
+  printf("VP8X: Found %d images in file (loop count = %d)\n", curr->num_frames,
+         kParams.loop_count);

  // Decode first frame
  if (!Decode()) goto Error;
@@ -645,12 +642,12 @@ int main(int argc, char* argv[]) {
  ClearParams();
  FREE_WARGV_AND_RETURN(EXIT_SUCCESS);

- Error:
+Error:
  ClearParams();
  FREE_WARGV_AND_RETURN(EXIT_FAILURE);
 }

-#else   // !WEBP_HAVE_GL
+#else  // !WEBP_HAVE_GL

 int main(int argc, const char* argv[]) {
  fprintf(stderr, "OpenGL support not enabled in %s.\n", argv[0]);
--- a/examples/webpinfo.c
+++ b/examples/webpinfo.c
@@ -47,25 +47,13 @@
    ++webp_info->num_warnings;                   \
  } while (0)

-static const char* const kFormats[3] = {
-  "Unknown",
-  "Lossy",
-  "Lossless"
-};
+static const char* const kFormats[3] = {"Unknown", "Lossy", "Lossless"};

 static const char* const kLosslessTransforms[4] = {
-  "Predictor",
-  "Cross Color",
-  "Subtract Green",
-  "Color Indexing"
-};
+    "Predictor", "Cross Color", "Subtract Green", "Color Indexing"};

-static const char* const kAlphaFilterMethods[4] = {
-  "None",
-  "Horizontal",
-  "Vertical",
-  "Gradient"
-};
+static const char* const kAlphaFilterMethods[4] = {"None", "Horizontal",
+                                                   "Vertical", "Gradient"};

 typedef enum {
  WEBP_INFO_OK = 0,
@@ -129,15 +117,15 @@ static void WebPInfoInit(WebPInfo* const webp_info) {
 }

 static const uint32_t kWebPChunkTags[CHUNK_TYPES] = {
-  MKFOURCC('V', 'P', '8', ' '),
-  MKFOURCC('V', 'P', '8', 'L'),
-  MKFOURCC('V', 'P', '8', 'X'),
-  MKFOURCC('A', 'L', 'P', 'H'),
-  MKFOURCC('A', 'N', 'I', 'M'),
-  MKFOURCC('A', 'N', 'M', 'F'),
-  MKFOURCC('I', 'C', 'C', 'P'),
-  MKFOURCC('E', 'X', 'I', 'F'),
-  MKFOURCC('X', 'M', 'P', ' '),
+    MKFOURCC('V', 'P', '8', ' '),  //
+    MKFOURCC('V', 'P', '8', 'L'),  //
+    MKFOURCC('V', 'P', '8', 'X'),  //
+    MKFOURCC('A', 'L', 'P', 'H'),  //
+    MKFOURCC('A', 'N', 'I', 'M'),  //
+    MKFOURCC('A', 'N', 'M', 'F'),  //
+    MKFOURCC('I', 'C', 'C', 'P'),  //
+    MKFOURCC('E', 'X', 'I', 'F'),  //
+    MKFOURCC('X', 'M', 'P', ' '),  //
 };

 // -----------------------------------------------------------------------------
@@ -200,9 +188,7 @@ static const uint8_t* GetBuffer(MemBuffer* const mem) {
  return mem->buf + mem->start;
 }

-static void Skip(MemBuffer* const mem, size_t size) {
-  mem->start += size;
-}
+static void Skip(MemBuffer* const mem, size_t size) { mem->start += size; }

 static uint32_t ReadMemBufLE32(MemBuffer* const mem) {
  const uint8_t* const data = mem->buf + mem->start;
@@ -266,9 +252,10 @@ static WebPInfoStatus ParseLossySegmentHeader(const WebPInfo* const webp_info,
    int update_map, update_data;
    GET_BITS(update_map, 1);
    GET_BITS(update_data, 1);
-    printf("  Update map:       %d\n"
-           "  Update data:      %d\n",
-           update_map, update_data);
+    printf(
+        "  Update map:       %d\n"
+        "  Update data:      %d\n",
+        update_map, update_data);
    if (update_data) {
      int i, a_delta;
      int quantizer[4] = {0, 0, 0, 0};
@@ -298,8 +285,8 @@ static WebPInfoStatus ParseLossySegmentHeader(const WebPInfo* const webp_info,
        GET_BITS(bit, 1);
        if (bit) GET_BITS(prob_segment[i], 8);
      }
-      printf("  Prob segment:     %d %d %d\n",
-             prob_segment[0], prob_segment[1], prob_segment[2]);
+      printf("  Prob segment:     %d %d %d\n", prob_segment[0], prob_segment[1],
+             prob_segment[2]);
    }
  }
  return WEBP_INFO_OK;
@@ -371,12 +358,13 @@ static WebPInfoStatus ParseLossyHeader(const ChunkData* const chunk_data,
      LOG_ERROR("Invalid lossy bitstream signature.");
      return WEBP_INFO_BITSTREAM_ERROR;
    }
-    printf("  Width:            %d\n"
-           "  X scale:          %d\n"
-           "  Height:           %d\n"
-           "  Y scale:          %d\n",
-           ((data[4] << 8) | data[3]) & 0x3fff, data[4] >> 6,
-           ((data[6] << 8) | data[5]) & 0x3fff, data[6] >> 6);
+    printf(
+        "  Width:            %d\n"
+        "  X scale:          %d\n"
+        "  Height:           %d\n"
+        "  Y scale:          %d\n",
+        ((data[4] << 8) | data[3]) & 0x3fff, data[4] >> 6,
+        ((data[6] << 8) | data[5]) & 0x3fff, data[6] >> 6);
    data += 7;
    data_size -= 7;
  } else {
@@ -479,7 +467,7 @@ static int LLGetBits(const uint8_t* const data, size_t data_size, size_t nb,
 static WebPInfoStatus ParseLosslessTransform(WebPInfo* const webp_info,
                                             const uint8_t* const data,
                                             size_t data_size,
-                                             uint64_t* const  bit_pos) {
+                                             uint64_t* const bit_pos) {
  int use_transform, block_size, n_colors;
  LL_GET_BITS(use_transform, 1);
  printf("  Use transform:    %s\n", use_transform ? "Yes" : "No");
@@ -499,7 +487,8 @@ static WebPInfoStatus ParseLosslessTransform(WebPInfo* const webp_info,
        n_colors += 1;
        printf("  No. of colors:    %d\n", n_colors);
        break;
-      default: break;
+      default:
+        break;
    }
  }
  return WEBP_INFO_OK;
@@ -556,8 +545,8 @@ static WebPInfoStatus ParseAlphaHeader(const ChunkData* const chunk_data,
    const int pre_processing = (data[0] >> 4) & 0x03;
    const int reserved_bits = (data[0] >> 6) & 0x03;
    printf("  Compression:      %d\n", compression_method);
-    printf("  Filter:           %s (%d)\n",
-           kAlphaFilterMethods[filter], filter);
+    printf("  Filter:           %s (%d)\n", kAlphaFilterMethods[filter],
+           filter);
    printf("  Pre-processing:   %d\n", pre_processing);
    if (compression_method > ALPHA_LOSSLESS_COMPRESSION) {
      LOG_ERROR("Invalid Alpha compression method.");
@@ -642,7 +631,7 @@ static WebPInfoStatus ParseChunk(const WebPInfo* const webp_info,
      LOG_ERROR("Size of chunk payload is over limit.");
      return WEBP_INFO_INVALID_PARAM;
    }
-    if (payload_size_padded > MemDataSize(mem)){
+    if (payload_size_padded > MemDataSize(mem)) {
      LOG_ERROR("Truncated data detected when parsing chunk payload.");
      return WEBP_INFO_TRUNCATED_DATA;
    }
@@ -695,8 +684,8 @@ static WebPInfoStatus ProcessVP8XChunk(const ChunkData* const chunk_data,
           (webp_info->feature_flags & EXIF_FLAG) != 0,
           (webp_info->feature_flags & XMP_FLAG) != 0,
           (webp_info->feature_flags & ANIMATION_FLAG) != 0);
-    printf("  Canvas size %d x %d\n",
-           webp_info->canvas_width, webp_info->canvas_height);
+    printf("  Canvas size %d x %d\n", webp_info->canvas_width,
+           webp_info->canvas_height);
  }
  if (webp_info->canvas_width > MAX_CANVAS_SIZE) {
    LOG_WARN("Canvas width is out of range in VP8X chunk.");
@@ -727,10 +716,8 @@ static WebPInfoStatus ProcessANIMChunk(const ChunkData* const chunk_data,
  ++webp_info->chunk_counts[CHUNK_ANIM];
  if (!webp_info->quiet) {
    printf("  Background color:(ARGB) %02x %02x %02x %02x\n",
-           (webp_info->bgcolor >> 24) & 0xff,
-           (webp_info->bgcolor >> 16) & 0xff,
-           (webp_info->bgcolor >> 8) & 0xff,
-           webp_info->bgcolor & 0xff);
+           (webp_info->bgcolor >> 24) & 0xff, (webp_info->bgcolor >> 16) & 0xff,
+           (webp_info->bgcolor >> 8) & 0xff, webp_info->bgcolor & 0xff);
    printf("  Loop count      : %d\n", webp_info->loop_count);
  }
  if (webp_info->loop_count > MAX_LOOP_COUNT) {
@@ -765,9 +752,10 @@ static WebPInfoStatus ProcessANMFChunk(const ChunkData* const chunk_data,
  blend = (temp >> 1) & 1;
  ++webp_info->chunk_counts[CHUNK_ANMF];
  if (!webp_info->quiet) {
-    printf("  Offset_X: %d\n  Offset_Y: %d\n  Width: %d\n  Height: %d\n"
-           "  Duration: %d\n  Dispose: %d\n  Blend: %d\n",
-           offset_x, offset_y, width, height, duration, dispose, blend);
+    printf(
+        "  Offset_X: %d\n  Offset_Y: %d\n  Width: %d\n  Height: %d\n"
+        "  Duration: %d\n  Dispose: %d\n  Blend: %d\n",
+        offset_x, offset_y, width, height, duration, dispose, blend);
  }
  if (duration > MAX_DURATION) {
    LOG_ERROR("Invalid duration parameter in ANMF chunk.");
@@ -804,10 +792,11 @@ static WebPInfoStatus ProcessImageChunk(const ChunkData* const chunk_data,
  }
  if (!webp_info->quiet) {
    assert(features.format >= 0 && features.format <= 2);
-    printf("  Width: %d\n  Height: %d\n  Alpha: %d\n  Animation: %d\n"
-           "  Format: %s (%d)\n",
-           features.width, features.height, features.has_alpha,
-           features.has_animation, kFormats[features.format], features.format);
+    printf(
+        "  Width: %d\n  Height: %d\n  Alpha: %d\n  Animation: %d\n"
+        "  Format: %s (%d)\n",
+        features.width, features.height, features.has_alpha,
+        features.has_animation, kFormats[features.format], features.format);
  }
  if (webp_info->is_processing_anim_frame) {
    ++webp_info->anmf_subchunk_counts[chunk_data->id == CHUNK_VP8 ? 0 : 1];
@@ -831,8 +820,7 @@ static WebPInfoStatus ProcessImageChunk(const ChunkData* const chunk_data,
      LOG_ERROR("Multiple VP8/VP8L chunks detected.");
      return WEBP_INFO_PARSE_ERROR;
    }
-    if (chunk_data->id == CHUNK_VP8L &&
-        webp_info->chunk_counts[CHUNK_ALPHA]) {
+    if (chunk_data->id == CHUNK_VP8L && webp_info->chunk_counts[CHUNK_ALPHA]) {
      LOG_WARN("Both VP8L and ALPH chunks are detected.");
    }
    if (webp_info->chunk_counts[CHUNK_ANIM] ||
@@ -882,8 +870,9 @@ static WebPInfoStatus ProcessALPHChunk(const ChunkData* const chunk_data,
    webp_info->seen_alpha_subchunk = 1;

    if (webp_info->seen_image_subchunk) {
-      LOG_ERROR("ALPHA sub-chunk detected after VP8 sub-chunk "
-                "in an ANMF chunk.");
+      LOG_ERROR(
+          "ALPHA sub-chunk detected after VP8 sub-chunk "
+          "in an ANMF chunk.");
      return WEBP_INFO_PARSE_ERROR;
    }
  } else {
@@ -938,7 +927,7 @@ static WebPInfoStatus ProcessChunk(const ChunkData* const chunk_data,
  if (chunk_data->id == CHUNK_UNKNOWN) {
    char error_message[50];
    snprintf(error_message, 50, "Unknown chunk at offset %6d, length %6d",
-            (int)chunk_data->offset, (int)chunk_data->size);
+             (int)chunk_data->offset, (int)chunk_data->size);
    LOG_WARN(error_message);
  } else {
    if (!webp_info->quiet) {
@@ -949,9 +938,8 @@ static WebPInfoStatus ProcessChunk(const ChunkData* const chunk_data,
               ((fourcc << 8) & 0xff0000) | (fourcc << 24);
 #endif
      memcpy(tag, &fourcc, sizeof(tag));
-      printf("Chunk %c%c%c%c at offset %6d, length %6d\n",
-             tag[0], tag[1], tag[2], tag[3], (int)chunk_data->offset,
-             (int)chunk_data->size);
+      printf("Chunk %c%c%c%c at offset %6d, length %6d\n", tag[0], tag[1],
+             tag[2], tag[3], (int)chunk_data->offset, (int)chunk_data->size);
    }
  }
  switch (id) {
@@ -1048,7 +1036,7 @@ static WebPInfoStatus Validate(WebPInfo* const webp_info) {
      return WEBP_INFO_PARSE_ERROR;
    }
    if (animation && (!webp_info->chunk_counts[CHUNK_ANIM] ||
-        !webp_info->chunk_counts[CHUNK_ANMF])) {
+                      !webp_info->chunk_counts[CHUNK_ANMF])) {
      LOG_ERROR("No ANIM/ANMF chunk detected in animation file.");
      return WEBP_INFO_PARSE_ERROR;
    }
@@ -1060,14 +1048,14 @@ static void ShowSummary(const WebPInfo* const webp_info) {
  int i;
  printf("Summary:\n");
  printf("Number of frames: %d\n", webp_info->num_frames);
-  printf("Chunk type  :  VP8 VP8L VP8X ALPH ANIM ANMF(VP8 /VP8L/ALPH) ICCP "
+  printf(
+      "Chunk type  :  VP8 VP8L VP8X ALPH ANIM ANMF(VP8 /VP8L/ALPH) ICCP "
      "EXIF  XMP\n");
  printf("Chunk counts: ");
  for (i = 0; i < CHUNK_TYPES; ++i) {
    printf("%4d ", webp_info->chunk_counts[i]);
    if (i == CHUNK_ANMF) {
-      printf("%4d %4d %4d  ",
-             webp_info->anmf_subchunk_counts[0],
+      printf("%4d %4d %4d  ", webp_info->anmf_subchunk_counts[0],
             webp_info->anmf_subchunk_counts[1],
             webp_info->anmf_subchunk_counts[2]);
    }
@@ -1097,7 +1085,7 @@ static WebPInfoStatus AnalyzeWebP(WebPInfo* const webp_info,
  //  Final check.
  webp_info_status = Validate(webp_info);

- Error:
+Error:
  if (!webp_info->quiet) {
    if (webp_info_status == WEBP_INFO_OK) {
      printf("No error detected.\n");
@@ -1112,15 +1100,16 @@ static WebPInfoStatus AnalyzeWebP(WebPInfo* const webp_info,
 }

 static void Help(void) {
-  printf("Usage: webpinfo [options] in_files\n"
-         "Note: there could be multiple input files;\n"
-         "      options must come before input files.\n"
-         "Options:\n"
-         "  -version ........... Print version number and exit.\n"
-         "  -quiet ............. Do not show chunk parsing information.\n"
-         "  -diag .............. Show parsing error diagnosis.\n"
-         "  -summary ........... Show chunk stats summary.\n"
-         "  -bitstream_info .... Parse bitstream header.\n");
+  printf(
+      "Usage: webpinfo [options] in_files\n"
+      "Note: there could be multiple input files;\n"
+      "      options must come before input files.\n"
+      "Options:\n"
+      "  -version ........... Print version number and exit.\n"
+      "  -quiet ............. Do not show chunk parsing information.\n"
+      "  -diag .............. Show parsing error diagnosis.\n"
+      "  -summary ........... Show chunk stats summary.\n"
+      "  -bitstream_info .... Parse bitstream header.\n");
 }

 // Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
@@ -1153,8 +1142,8 @@ int main(int argc, const char* argv[]) {
      parse_bitstream = 1;
    } else if (!strcmp(argv[c], "-version")) {
      const int version = WebPGetDecoderVersion();
-      printf("WebP Decoder version: %d.%d.%d\n",
-             (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+      printf("WebP Decoder version: %d.%d.%d\n", (version >> 16) & 0xff,
+             (version >> 8) & 0xff, version & 0xff);
      FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
    } else {  // Assume the remaining are all input files.
      break;
--- a/examples/webpmux.c
+++ b/examples/webpmux.c
@@ -106,14 +106,11 @@ typedef enum {
  LAST_FEATURE
 } FeatureType;

-static const char* const kFourccList[LAST_FEATURE] = {
-  NULL, "EXIF", "XMP ", "ICCP", "ANMF"
-};
+static const char* const kFourccList[LAST_FEATURE] = {NULL, "EXIF", "XMP ",
+                                                      "ICCP", "ANMF"};

 static const char* const kDescriptions[LAST_FEATURE] = {
-  NULL, "EXIF metadata", "XMP metadata", "ICC profile",
-  "Animation frame"
-};
+    NULL, "EXIF metadata", "XMP metadata", "ICC profile", "Animation frame"};

 typedef struct {
  CommandLineArguments cmd_args;
@@ -143,50 +140,49 @@ static int CountOccurrences(const CommandLineArguments* const args,
 }

 static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
-  "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
-  "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"
-};
+    "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
+    "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"};

 static const char* ErrorString(WebPMuxError err) {
  assert(err <= WEBP_MUX_NOT_FOUND && err >= WEBP_MUX_NOT_ENOUGH_DATA);
  return kErrorMessages[-err];
 }

-#define RETURN_IF_ERROR(ERR_MSG)                                     \
-  do {                                                               \
-    if (err != WEBP_MUX_OK) {                                        \
-      fprintf(stderr, ERR_MSG);                                      \
-      return err;                                                    \
-    }                                                                \
+#define RETURN_IF_ERROR(ERR_MSG) \
+  do {                           \
+    if (err != WEBP_MUX_OK) {    \
+      fprintf(stderr, ERR_MSG);  \
+      return err;                \
+    }                            \
  } while (0)

-#define RETURN_IF_ERROR3(ERR_MSG, FORMAT_STR1, FORMAT_STR2)          \
-  do {                                                               \
-    if (err != WEBP_MUX_OK) {                                        \
-      fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2);            \
-      return err;                                                    \
-    }                                                                \
+#define RETURN_IF_ERROR3(ERR_MSG, FORMAT_STR1, FORMAT_STR2) \
+  do {                                                      \
+    if (err != WEBP_MUX_OK) {                               \
+      fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2);   \
+      return err;                                           \
+    }                                                       \
  } while (0)

-#define ERROR_GOTO1(ERR_MSG, LABEL)                                  \
-  do {                                                               \
-    fprintf(stderr, ERR_MSG);                                        \
-    ok = 0;                                                          \
-    goto LABEL;                                                      \
+#define ERROR_GOTO1(ERR_MSG, LABEL) \
+  do {                              \
+    fprintf(stderr, ERR_MSG);       \
+    ok = 0;                         \
+    goto LABEL;                     \
  } while (0)

-#define ERROR_GOTO2(ERR_MSG, FORMAT_STR, LABEL)                      \
-  do {                                                               \
-    fprintf(stderr, ERR_MSG, FORMAT_STR);                            \
-    ok = 0;                                                          \
-    goto LABEL;                                                      \
+#define ERROR_GOTO2(ERR_MSG, FORMAT_STR, LABEL) \
+  do {                                          \
+    fprintf(stderr, ERR_MSG, FORMAT_STR);       \
+    ok = 0;                                     \
+    goto LABEL;                                 \
  } while (0)

-#define ERROR_GOTO3(ERR_MSG, FORMAT_STR1, FORMAT_STR2, LABEL)        \
-  do {                                                               \
-    fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2);              \
-    ok = 0;                                                          \
-    goto LABEL;                                                      \
+#define ERROR_GOTO3(ERR_MSG, FORMAT_STR1, FORMAT_STR2, LABEL) \
+  do {                                                        \
+    fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2);       \
+    ok = 0;                                                   \
+    goto LABEL;                                               \
  } while (0)

 static WebPMuxError DisplayInfo(const WebPMux* mux) {
@@ -208,10 +204,10 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
  // Print the features present.
  printf("Features present:");
  if (flag & ANIMATION_FLAG) printf(" animation");
-  if (flag & ICCP_FLAG)      printf(" ICC profile");
-  if (flag & EXIF_FLAG)      printf(" EXIF metadata");
-  if (flag & XMP_FLAG)       printf(" XMP metadata");
-  if (flag & ALPHA_FLAG)     printf(" transparency");
+  if (flag & ICCP_FLAG) printf(" ICC profile");
+  if (flag & EXIF_FLAG) printf(" EXIF metadata");
+  if (flag & XMP_FLAG) printf(" XMP metadata");
+  if (flag & ALPHA_FLAG) printf(" transparency");
  printf("\n");

  if (flag & ANIMATION_FLAG) {
@@ -222,8 +218,8 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
    WebPMuxAnimParams params;
    err = WebPMuxGetAnimationParams(mux, &params);
    assert(err == WEBP_MUX_OK);
-    printf("Background color : 0x%.8X  Loop Count : %d\n",
-           params.bgcolor, params.loop_count);
+    printf("Background color : 0x%.8X  Loop Count : %d\n", params.bgcolor,
+           params.loop_count);

    err = WebPMuxNumChunks(mux, id, &nFrames);
    assert(err == WEBP_MUX_OK);
@@ -255,9 +251,9 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
            printf("%8d %10s %5s ", frame.duration, dispose, blend);
          }
          printf("%10d %11s\n", (int)frame.bitstream.size,
-                 (features.format == 1) ? "lossy" :
-                 (features.format == 2) ? "lossless" :
-                                          "undefined");
+                 (features.format == 1)   ? "lossy"
+                 : (features.format == 2) ? "lossless"
+                                          : "undefined");
        }
        WebPDataClear(&frame.bitstream);
        RETURN_IF_ERROR3("Failed to retrieve %s#%d\n", type_str, i);
@@ -305,8 +301,9 @@ static void PrintHelp(void) {
  printf("       webpmux -duration DURATION_OPTIONS [-duration ...]\n");
  printf("               INPUT -o OUTPUT\n");
  printf("       webpmux -strip STRIP_OPTIONS INPUT -o OUTPUT\n");
-  printf("       webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]"
-         "\n");
+  printf(
+      "       webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]"
+      "\n");
  printf("               [-bgcolor BACKGROUND_COLOR] -o OUTPUT\n");
  printf("       webpmux -info INPUT\n");
  printf("       webpmux [-h|-help]\n");
@@ -360,8 +357,9 @@ static void PrintHelp(void) {
  printf("             'di' is the pause duration before next frame,\n");
  printf("             'xi','yi' specify the image offset for this frame,\n");
  printf("             'mi' is the dispose method for this frame (0 or 1),\n");
-  printf("             'bi' is the blending method for this frame (+b or -b)"
-         "\n");
+  printf(
+      "             'bi' is the blending method for this frame (+b or -b)"
+      "\n");

  printf("\n");
  printf("LOOP_COUNT:\n");
@@ -372,27 +370,33 @@ static void PrintHelp(void) {
  printf("BACKGROUND_COLOR:\n");
  printf(" Background color of the canvas.\n");
  printf("  A,R,G,B\n");
-  printf("  where:    'A', 'R', 'G' and 'B' are integers in the range 0 to 255 "
-         "specifying\n");
-  printf("            the Alpha, Red, Green and Blue component values "
-         "respectively\n");
+  printf(
+      "  where:    'A', 'R', 'G' and 'B' are integers in the range 0 to 255 "
+      "specifying\n");
+  printf(
+      "            the Alpha, Red, Green and Blue component values "
+      "respectively\n");
  printf("            [Default: 255,255,255,255]\n");

  printf("\nINPUT & OUTPUT are in WebP format.\n");

  printf("\nNote: The nature of EXIF, XMP and ICC data is not checked");
  printf(" and is assumed to be\nvalid.\n");
-  printf("\nNote: if a single file name is passed as the argument, the "
-         "arguments will be\n");
-  printf("tokenized from this file. The file name must not start with "
-         "the character '-'.\n");
+  printf(
+      "\nNote: if a single file name is passed as the argument, the "
+      "arguments will be\n");
+  printf(
+      "tokenized from this file. The file name must not start with "
+      "the character '-'.\n");
 }

 static void WarnAboutOddOffset(const WebPMuxFrameInfo* const info) {
  if ((info->x_offset | info->y_offset) & 1) {
-    fprintf(stderr, "Warning: odd offsets will be snapped to even values"
-            " (%d, %d) -> (%d, %d)\n", info->x_offset, info->y_offset,
-            info->x_offset & ~1, info->y_offset & ~1);
+    fprintf(stderr,
+            "Warning: odd offsets will be snapped to even values"
+            " (%d, %d) -> (%d, %d)\n",
+            info->x_offset, info->y_offset, info->x_offset & ~1,
+            info->y_offset & ~1);
  }
 }

@@ -420,8 +424,8 @@ static int WriteData(const char* filename, const WebPData* const webpdata) {
  if (fwrite(webpdata->bytes, webpdata->size, 1, fout) != 1) {
    WFPRINTF(stderr, "Error writing file %s!\n", (const W_CHAR*)filename);
  } else {
-    WFPRINTF(stderr, "Saved file %s (%d bytes)\n",
-             (const W_CHAR*)filename, (int)webpdata->size);
+    WFPRINTF(stderr, "Saved file %s (%d bytes)\n", (const W_CHAR*)filename,
+             (int)webpdata->size);
    ok = 1;
  }
  if (fout != stdout) fclose(fout);
@@ -454,8 +458,8 @@ static WebPMux* DuplicateMuxHeader(const WebPMux* const mux) {
  if (err == WEBP_MUX_OK) {
    err = WebPMuxSetAnimationParams(new_mux, &p);
    if (err != WEBP_MUX_OK) {
-      ERROR_GOTO2("Error (%s) handling animation params.\n",
-                  ErrorString(err), End);
+      ERROR_GOTO2("Error (%s) handling animation params.\n", ErrorString(err),
+                  End);
    }
  } else {
    /* it might not be an animation. Just keep moving. */
@@ -473,7 +477,7 @@ static WebPMux* DuplicateMuxHeader(const WebPMux* const mux) {
    }
  }

- End:
+End:
  if (!ok) {
    WebPMuxDelete(new_mux);
    new_mux = NULL;
@@ -511,8 +515,7 @@ static int ParseFrameArgs(const char* args, WebPMuxFrameInfo* const info) {

  if (blend_method != 'b') return 0;
  if (plus_minus != '-' && plus_minus != '+') return 0;
-  info->blend_method =
-      (plus_minus == '+') ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
+  info->blend_method = (plus_minus == '+') ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
  return 1;
 }

@@ -584,8 +587,10 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,
  }

  if ((num_frame_args == 0) && (num_loop_args + num_bgcolor_args > 0)) {
-    ERROR_GOTO1("ERROR: Loop count and background color are relevant only in "
-                "case of animation.\n", ErrValidate);
+    ERROR_GOTO1(
+        "ERROR: Loop count and background color are relevant only in "
+        "case of animation.\n",
+        ErrValidate);
  }
  if (num_durations_args > 0 && num_frame_args != 0) {
    ERROR_GOTO1("ERROR: Can not combine -duration and -frame commands.\n",
@@ -603,7 +608,7 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,
    *num_feature_args = num_frame_args + num_loop_args + num_bgcolor_args;
  }

- ErrValidate:
+ErrValidate:
  return ok;
 }

@@ -611,12 +616,12 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,

 #define FEATURETYPE_IS_NIL (config->type == NIL_FEATURE)

-#define CHECK_NUM_ARGS_AT_LEAST(NUM, LABEL)                              \
-  do {                                                                   \
-    if (argc < i + (NUM)) {                                              \
-      fprintf(stderr, "ERROR: Too few arguments for '%s'.\n", argv[i]);  \
-      goto LABEL;                                                        \
-    }                                                                    \
+#define CHECK_NUM_ARGS_AT_LEAST(NUM, LABEL)                             \
+  do {                                                                  \
+    if (argc < i + (NUM)) {                                             \
+      fprintf(stderr, "ERROR: Too few arguments for '%s'.\n", argv[i]); \
+      goto LABEL;                                                       \
+    }                                                                   \
  } while (0)

 #define CHECK_NUM_ARGS_AT_MOST(NUM, LABEL)                               \
@@ -627,10 +632,10 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,
    }                                                                    \
  } while (0)

-#define CHECK_NUM_ARGS_EXACTLY(NUM, LABEL)                               \
-  do {                                                                   \
-    CHECK_NUM_ARGS_AT_LEAST(NUM, LABEL);                                 \
-    CHECK_NUM_ARGS_AT_MOST(NUM, LABEL);                                  \
+#define CHECK_NUM_ARGS_EXACTLY(NUM, LABEL) \
+  do {                                     \
+    CHECK_NUM_ARGS_AT_LEAST(NUM, LABEL);   \
+    CHECK_NUM_ARGS_AT_MOST(NUM, LABEL);    \
  } while (0)

 // Parses command-line arguments to fill up config object. Also performs some
@@ -739,8 +744,8 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
        exit(0);
      } else if (!strcmp(argv[i], "-version")) {
        const int version = WebPGetMuxVersion();
-        printf("%d.%d.%d\n",
-               (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+        printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff,
+               version & 0xff);
        DeleteConfig(config);
        LOCAL_FREE((W_CHAR**)unicode_argv);
        exit(0);
@@ -771,8 +776,9 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
        if (!strcmp(argv[i], "icc") || !strcmp(argv[i], "exif") ||
            !strcmp(argv[i], "xmp")) {
          if (FEATURETYPE_IS_NIL) {
-            config->type = (!strcmp(argv[i], "icc")) ? FEATURE_ICCP :
-                (!strcmp(argv[i], "exif")) ? FEATURE_EXIF : FEATURE_XMP;
+            config->type = (!strcmp(argv[i], "icc"))    ? FEATURE_ICCP
+                           : (!strcmp(argv[i], "exif")) ? FEATURE_EXIF
+                                                        : FEATURE_XMP;
          } else {
            ERROR_GOTO1("ERROR: Multiple features specified.\n", ErrParse);
          }
@@ -826,7 +832,7 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
      }
    }
  }
- ErrParse:
+ErrParse:
  return ok;
 }

@@ -858,7 +864,7 @@ static int ValidateConfig(Config* const config) {
    ERROR_GOTO1("ERROR: No output file specified.\n", ErrValidate2);
  }

- ErrValidate2:
+ErrValidate2:
  return ok;
 }

@@ -889,7 +895,7 @@ static int InitializeConfig(int argc, const char* argv[], Config* const config,
    ERROR_GOTO1("Exiting due to command-line parsing error.\n", Err1);
  }

- Err1:
+Err1:
  return ok;
 }

@@ -921,8 +927,8 @@ static int GetFrame(const WebPMux* mux, const Config* config) {
  err = WebPMuxGetFrame(mux, num, &info);
  if (err == WEBP_MUX_OK && info.id != id) err = WEBP_MUX_NOT_FOUND;
  if (err != WEBP_MUX_OK) {
-    ERROR_GOTO3("ERROR (%s): Could not get frame %d.\n",
-                ErrorString(err), num, ErrGet);
+    ERROR_GOTO3("ERROR (%s): Could not get frame %d.\n", ErrorString(err), num,
+                ErrGet);
  }

  mux_single = WebPMuxNew();
@@ -939,7 +945,7 @@ static int GetFrame(const WebPMux* mux, const Config* config) {

  ok = WriteWebP(mux_single, config->output);

- ErrGet:
+ErrGet:
  WebPDataClear(&info.bitstream);
  WebPMuxDelete(mux_single);
  return ok && !parse_error;
@@ -966,8 +972,8 @@ static int Process(const Config* config) {
        case FEATURE_XMP:
          err = WebPMuxGetChunk(mux, kFourccList[config->type], &chunk);
          if (err != WEBP_MUX_OK) {
-            ERROR_GOTO3("ERROR (%s): Could not get the %s.\n",
-                        ErrorString(err), kDescriptions[config->type], Err2);
+            ERROR_GOTO3("ERROR (%s): Could not get the %s.\n", ErrorString(err),
+                        kDescriptions[config->type], Err2);
          }
          ok = WriteData(config->output, &chunk);
          break;
@@ -982,7 +988,7 @@ static int Process(const Config* config) {
      switch (config->type) {
        case FEATURE_ANMF: {
          int i;
-          WebPMuxAnimParams params = { 0xFFFFFFFF, 0 };
+          WebPMuxAnimParams params = {0xFFFFFFFF, 0};
          mux = WebPMuxNew();
          if (mux == NULL) {
            ERROR_GOTO2("ERROR (%s): Could not allocate a mux object.\n",
@@ -1008,8 +1014,10 @@ static int Process(const Config* config) {
                  // Note: This is only a 'necessary' condition for loop_count
                  // to be valid. The 'sufficient' conditioned in checked in
                  // WebPMuxSetAnimationParams() method called later.
-                  ERROR_GOTO1("ERROR: Loop count must be in the range 0 to "
-                              "65535.\n", Err2);
+                  ERROR_GOTO1(
+                      "ERROR: Loop count must be in the range 0 to "
+                      "65535.\n",
+                      Err2);
                }
                ok = !parse_error;
                if (!ok) goto Err2;
@@ -1031,8 +1039,10 @@ static int Process(const Config* config) {
                err = WebPMuxPushFrame(mux, &frame, 1);
                WebPDataClear(&frame.bitstream);
                if (err != WEBP_MUX_OK) {
-                  ERROR_GOTO3("ERROR (%s): Could not add a frame at index %d."
-                              "\n", ErrorString(err), i, Err2);
+                  ERROR_GOTO3(
+                      "ERROR (%s): Could not add a frame at index %d."
+                      "\n",
+                      ErrorString(err), i, Err2);
                }
                break;
              }
@@ -1060,13 +1070,13 @@ static int Process(const Config* config) {
          err = WebPMuxSetChunk(mux, kFourccList[config->type], &chunk, 1);
          WebPDataClear(&chunk);
          if (err != WEBP_MUX_OK) {
-            ERROR_GOTO3("ERROR (%s): Could not set the %s.\n",
-                        ErrorString(err), kDescriptions[config->type], Err2);
+            ERROR_GOTO3("ERROR (%s): Could not set the %s.\n", ErrorString(err),
+                        kDescriptions[config->type], Err2);
          }
          break;
        }
        case FEATURE_LOOP: {
-          WebPMuxAnimParams params = { 0xFFFFFFFF, 0 };
+          WebPMuxAnimParams params = {0xFFFFFFFF, 0};
          int parse_error = 0;
          const int loop_count =
              ExUtilGetInt(config->args[0].params, 10, &parse_error);
@@ -1091,12 +1101,11 @@ static int Process(const Config* config) {
          break;
        }
        case FEATURE_BGCOLOR: {
-          WebPMuxAnimParams params = { 0xFFFFFFFF, 0 };
+          WebPMuxAnimParams params = {0xFFFFFFFF, 0};
          uint32_t bgcolor;
          ok = ParseBgcolorArgs(config->args[0].params, &bgcolor);
          if (!ok) {
-            ERROR_GOTO1("ERROR: Could not parse the background color.\n",
-                        Err2);
+            ERROR_GOTO1("ERROR: Could not parse the background color.\n", Err2);
          }
          ok = CreateMux(config->input, &mux);
          if (!ok) goto Err2;
@@ -1132,8 +1141,9 @@ static int Process(const Config* config) {
        ERROR_GOTO1("ERROR: can not parse the number of frames.\n", Err2);
      }
      if (num_frames == 0) {
-        fprintf(stderr, "Doesn't look like the source is animated. "
-                        "Skipping duration setting.\n");
+        fprintf(stderr,
+                "Doesn't look like the source is animated. "
+                "Skipping duration setting.\n");
        ok = WriteWebP(mux, config->output);
        if (!ok) goto Err2;
      } else {
@@ -1150,8 +1160,8 @@ static int Process(const Config* config) {
          int k;
          int args[3];
          int duration, start, end;
-          const int nb_args = ExUtilGetInts(config->args[i].params,
-                                            10, 3, args);
+          const int nb_args =
+              ExUtilGetInts(config->args[i].params, 10, 3, args);
          ok = (nb_args >= 1);
          if (!ok) goto Err3;
          duration = args[0];
@@ -1159,7 +1169,7 @@ static int Process(const Config* config) {
            ERROR_GOTO1("ERROR: duration must be strictly positive.\n", Err3);
          }

-          if (nb_args == 1) {   // only duration is present -> use full interval
+          if (nb_args == 1) {  // only duration is present -> use full interval
            start = 1;
            end = num_frames;
          } else {
@@ -1198,7 +1208,7 @@ static int Process(const Config* config) {
        mux = new_mux;  // transfer for the WebPMuxDelete() call
        new_mux = NULL;

- Err3:
+      Err3:
        WebPFree(durations);
        WebPMuxDelete(new_mux);
        if (!ok) goto Err2;
@@ -1212,8 +1222,8 @@ static int Process(const Config* config) {
          config->type == FEATURE_XMP) {
        err = WebPMuxDeleteChunk(mux, kFourccList[config->type]);
        if (err != WEBP_MUX_OK) {
-          ERROR_GOTO3("ERROR (%s): Could not strip the %s.\n",
-                      ErrorString(err), kDescriptions[config->type], Err2);
+          ERROR_GOTO3("ERROR (%s): Could not strip the %s.\n", ErrorString(err),
+                      kDescriptions[config->type], Err2);
        }
      } else {
        ERROR_GOTO1("ERROR: Invalid feature for action 'strip'.\n", Err2);
@@ -1234,7 +1244,7 @@ static int Process(const Config* config) {
    }
  }

- Err2:
+Err2:
  WebPMuxDelete(mux);
  return ok;
 }
--- a/extras/extras.c
+++ b/extras/extras.c
@@ -45,7 +45,7 @@ int WebPImportGray(const uint8_t* gray_data, WebPPicture* pic) {
  uv_width = (width + 1) >> 1;
  for (y = 0; y < pic->height; ++y) {
    memcpy(pic->y + y * pic->y_stride, gray_data, width);
-    gray_data += width;    // <- we could use some 'data_stride' here if needed
+    gray_data += width;  // <- we could use some 'data_stride' here if needed
    if ((y & 1) == 0) {
      memset(pic->u + (y >> 1) * pic->uv_stride, 128, uv_width);
      memset(pic->v + (y >> 1) * pic->uv_stride, 128, uv_width);
--- a/extras/extras.h
+++ b/extras/extras.h
@@ -22,7 +22,7 @@ extern "C" {
 #include "sharpyuv/sharpyuv.h"
 #include "webp/encode.h"

-#define WEBP_EXTRAS_ABI_VERSION 0x0003    // MAJOR(8b) + MINOR(8b)
+#define WEBP_EXTRAS_ABI_VERSION 0x0003  // MAJOR(8b) + MINOR(8b)

 //------------------------------------------------------------------------------

@@ -49,10 +49,10 @@ WEBP_EXTERN int WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
 // MAX_PALETTE_SIZE. 'pic' must have been initialized. Its content, if any,
 // will be discarded. Returns 'false' in case of error, or if indexed[] contains
 // invalid indices.
-WEBP_EXTERN int
-WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
-                          const uint32_t palette[], int palette_size,
-                          WebPPicture* pic);
+WEBP_EXTERN int WebPImportColorMappedARGB(const uint8_t* indexed,
+                                          int indexed_stride,
+                                          const uint32_t palette[],
+                                          int palette_size, WebPPicture* pic);

 // Convert the ARGB content of 'pic' from associated to unassociated.
 // 'pic' can be for instance the result of calling of some WebPPictureImportXXX
@@ -98,15 +98,17 @@ WEBP_EXTERN int VP8EstimateQuality(const uint8_t* const data, size_t size);
 //     currently supported.
 // width, height: width and height of the image in pixels
 // Returns 0 on failure.
-WEBP_EXTERN int SharpYuvEstimate420Risk(
-    const void* r_ptr, const void* g_ptr, const void* b_ptr, int rgb_step,
-    int rgb_stride, int rgb_bit_depth, int width, int height,
-    const SharpYuvOptions* options, float* score);
+WEBP_EXTERN int SharpYuvEstimate420Risk(const void* r_ptr, const void* g_ptr,
+                                        const void* b_ptr, int rgb_step,
+                                        int rgb_stride, int rgb_bit_depth,
+                                        int width, int height,
+                                        const SharpYuvOptions* options,
+                                        float* score);

 //------------------------------------------------------------------------------

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_EXTRAS_EXTRAS_H_
--- a/extras/get_disto.c
+++ b/extras/get_disto.c
@@ -48,7 +48,7 @@ static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
  reader = WebPGuessImageReader(data, data_size);
  ok = reader(data, data_size, pic, keep_alpha, NULL);

- End:
+End:
  if (!ok) {
    WFPRINTF(stderr, "Error! Could not process file %s\n",
             (const W_CHAR*)filename);
@@ -57,8 +57,8 @@ static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
  return ok ? data_size : 0;
 }

-static void RescalePlane(uint8_t* plane, int width, int height,
-                         int x_stride, int y_stride, int max) {
+static void RescalePlane(uint8_t* plane, int width, int height, int x_stride,
+                         int y_stride, int max) {
  const uint32_t factor = (max > 0) ? (255u << 16) / max : 0;
  int x, y;
  for (y = 0; y < height; ++y) {
@@ -71,9 +71,9 @@ static void RescalePlane(uint8_t* plane, int width, int height,
 }

 // Return the max absolute difference.
-static int DiffScaleChannel(uint8_t* src1, int stride1,
-                            const uint8_t* src2, int stride2,
-                            int x_stride, int w, int h, int do_scaling) {
+static int DiffScaleChannel(uint8_t* src1, int stride1, const uint8_t* src2,
+                            int stride2, int x_stride, int w, int h,
+                            int do_scaling) {
  int x, y;
  int max = 0;
  for (y = 0; y < h; ++y) {
@@ -95,7 +95,7 @@ static int DiffScaleChannel(uint8_t* src1, int stride1,
 // breaking the library's hidden visibility. This code duplication avoids the
 // bigger annoyance of having to open up internal details of libdsp...

-#define SSIM_KERNEL 3   // total size of the kernel: 2 * SSIM_KERNEL + 1
+#define SSIM_KERNEL 3  // total size of the kernel: 2 * SSIM_KERNEL + 1

 // struct for accumulating statistical moments
 typedef struct {
@@ -105,19 +105,19 @@ typedef struct {
 } DistoStats;

 // hat-shaped filter. Sum of coefficients is equal to 16.
-static const uint32_t kWeight[2 * SSIM_KERNEL + 1] = { 1, 2, 3, 4, 3, 2, 1 };
+static const uint32_t kWeight[2 * SSIM_KERNEL + 1] = {1, 2, 3, 4, 3, 2, 1};

 static WEBP_INLINE double SSIMCalculation(const DistoStats* const stats) {
  const uint32_t N = stats->w;
-  const uint32_t w2 =  N * N;
+  const uint32_t w2 = N * N;
  const uint32_t C1 = 20 * w2;
  const uint32_t C2 = 60 * w2;
-  const uint32_t C3 = 8 * 8 * w2;   // 'dark' limit ~= 6
+  const uint32_t C3 = 8 * 8 * w2;  // 'dark' limit ~= 6
  const uint64_t xmxm = (uint64_t)stats->xm * stats->xm;
  const uint64_t ymym = (uint64_t)stats->ym * stats->ym;
  if (xmxm + ymym >= C3) {
    const int64_t xmym = (int64_t)stats->xm * stats->ym;
-    const int64_t sxy = (int64_t)stats->xym * N - xmym;    // can be negative
+    const int64_t sxy = (int64_t)stats->xym * N - xmym;  // can be negative
    const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm;
    const uint64_t syy = (uint64_t)stats->yym * N - ymym;
    // we descale by 8 to prevent overflow during the fnum/fden multiply.
@@ -129,13 +129,13 @@ static WEBP_INLINE double SSIMCalculation(const DistoStats* const stats) {
    assert(r >= 0. && r <= 1.0);
    return r;
  }
-  return 1.;   // area is too dark to contribute meaningfully
+  return 1.;  // area is too dark to contribute meaningfully
 }

 static double SSIMGetClipped(const uint8_t* src1, int stride1,
-                             const uint8_t* src2, int stride2,
-                             int xo, int yo, int W, int H) {
-  DistoStats stats = { 0, 0, 0, 0, 0, 0 };
+                             const uint8_t* src2, int stride2, int xo, int yo,
+                             int W, int H) {
+  DistoStats stats = {0, 0, 0, 0, 0, 0};
  const int ymin = (yo - SSIM_KERNEL < 0) ? 0 : yo - SSIM_KERNEL;
  const int ymax = (yo + SSIM_KERNEL > H - 1) ? H - 1 : yo + SSIM_KERNEL;
  const int xmin = (xo - SSIM_KERNEL < 0) ? 0 : xo - SSIM_KERNEL;
@@ -145,13 +145,13 @@ static double SSIMGetClipped(const uint8_t* src1, int stride1,
  src2 += ymin * stride2;
  for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
    for (x = xmin; x <= xmax; ++x) {
-      const uint32_t w = kWeight[SSIM_KERNEL + x - xo]
-                       * kWeight[SSIM_KERNEL + y - yo];
+      const uint32_t w =
+          kWeight[SSIM_KERNEL + x - xo] * kWeight[SSIM_KERNEL + y - yo];
      const uint32_t s1 = src1[x];
      const uint32_t s2 = src2[x];
-      stats.w   += w;
-      stats.xm  += w * s1;
-      stats.ym  += w * s2;
+      stats.w += w;
+      stats.xm += w * s1;
+      stats.ym += w * s2;
      stats.xxm += w * s1 * s1;
      stats.xym += w * s1 * s2;
      stats.yym += w * s2 * s2;
@@ -161,9 +161,9 @@ static double SSIMGetClipped(const uint8_t* src1, int stride1,
 }

 // Compute SSIM-score map. Return -1 in case of error, max diff otherwise.
-static int SSIMScaleChannel(uint8_t* src1, int stride1,
-                            const uint8_t* src2, int stride2,
-                            int x_stride, int w, int h, int do_scaling) {
+static int SSIMScaleChannel(uint8_t* src1, int stride1, const uint8_t* src2,
+                            int stride2, int x_stride, int w, int h,
+                            int do_scaling) {
  int x, y;
  int max = 0;
  uint8_t* const plane1 = (uint8_t*)malloc(2 * w * h * sizeof(*plane1));
@@ -205,8 +205,8 @@ static void ConvertToGray(WebPPicture* const pic) {
    for (x = 0; x < pic->width; ++x) {
      const uint32_t argb = row[x];
      const uint32_t r = (argb >> 16) & 0xff;
-      const uint32_t g = (argb >>  8) & 0xff;
-      const uint32_t b = (argb >>  0) & 0xff;
+      const uint32_t g = (argb >> 8) & 0xff;
+      const uint32_t b = (argb >> 0) & 0xff;
      // We use BT.709 for converting to luminance.
      const uint32_t Y = (uint32_t)(0.2126 * r + 0.7152 * g + 0.0722 * b + .5);
      row[x] = (argb & 0xff000000u) | (Y * 0x010101u);
@@ -297,8 +297,7 @@ int main(int argc, const char* argv[]) {
    fprintf(stderr, "Error while computing the distortion.\n");
    goto End;
  }
-  printf("%u %.2f    %.2f %.2f %.2f %.2f [ %.2f bpp ]\n",
-         (unsigned int)size1,
+  printf("%u %.2f    %.2f %.2f %.2f %.2f [ %.2f bpp ]\n", (unsigned int)size1,
         disto[4], disto[0], disto[1], disto[2], disto[3],
         8.f * size1 / pic1.width / pic1.height);

@@ -306,21 +305,25 @@ int main(int argc, const char* argv[]) {
    uint8_t* data = NULL;
    size_t data_size = 0;
    if (pic1.use_argb != pic2.use_argb) {
-      fprintf(stderr, "Pictures are not in the same argb format. "
-                      "Can't save the difference map.\n");
+      fprintf(stderr,
+              "Pictures are not in the same argb format. "
+              "Can't save the difference map.\n");
      goto End;
    }
    if (pic1.use_argb) {
      int n;
      fprintf(stderr, "max differences per channel: ");
-      for (n = 0; n < 3; ++n) {    // skip the alpha channel
-        const int range = (type == 1) ?
-          SSIMScaleChannel((uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
-                           (const uint8_t*)pic2.argb + n, pic2.argb_stride * 4,
-                           4, pic1.width, pic1.height, scale) :
-          DiffScaleChannel((uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
-                           (const uint8_t*)pic2.argb + n, pic2.argb_stride * 4,
-                           4, pic1.width, pic1.height, scale);
+      for (n = 0; n < 3; ++n) {  // skip the alpha channel
+        const int range =
+            (type == 1)
+                ? SSIMScaleChannel(
+                      (uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
+                      (const uint8_t*)pic2.argb + n, pic2.argb_stride * 4, 4,
+                      pic1.width, pic1.height, scale)
+                : DiffScaleChannel(
+                      (uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
+                      (const uint8_t*)pic2.argb + n, pic2.argb_stride * 4, 4,
+                      pic1.width, pic1.height, scale);
        if (range < 0) fprintf(stderr, "\nError computing diff map\n");
        fprintf(stderr, "[%d]", range);
      }
@@ -331,10 +334,9 @@ int main(int argc, const char* argv[]) {
      goto End;
    }
 #if !defined(WEBP_REDUCE_CSP)
-    data_size = WebPEncodeLosslessBGRA((const uint8_t*)pic1.argb,
-                                       pic1.width, pic1.height,
-                                       pic1.argb_stride * 4,
-                                       &data);
+    data_size =
+        WebPEncodeLosslessBGRA((const uint8_t*)pic1.argb, pic1.width,
+                               pic1.height, pic1.argb_stride * 4, &data);
    if (data_size == 0) {
      fprintf(stderr, "Error during lossless encoding.\n");
      goto End;
@@ -346,14 +348,15 @@ int main(int argc, const char* argv[]) {
 #else
    (void)data;
    (void)data_size;
-    fprintf(stderr, "Cannot save the difference map. Please recompile "
-                    "without the WEBP_REDUCE_CSP flag.\n");
+    fprintf(stderr,
+            "Cannot save the difference map. Please recompile "
+            "without the WEBP_REDUCE_CSP flag.\n");
    goto End;
 #endif  // WEBP_REDUCE_CSP
  }
  ret = EXIT_SUCCESS;

- End:
+End:
  WebPPictureFree(&pic1);
  WebPPictureFree(&pic2);
  FREE_WARGV_AND_RETURN(ret);
--- a/extras/quality_estimate.c
+++ b/extras/quality_estimate.c
@@ -54,10 +54,10 @@ int VP8EstimateQuality(const uint8_t* const data, size_t size) {
  if (data == NULL) return -1;

  if (WebPGetFeatures(data, size, &features) != VP8_STATUS_OK) {
-    return -1;   // invalid file
+    return -1;  // invalid file
  }
-  if (features.format == 2) return 101;  // lossless
-  if (features.format == 0 || features.has_animation) return -1;   // mixed
+  if (features.format == 2) return 101;                           // lossless
+  if (features.format == 0 || features.has_animation) return -1;  // mixed

  while (pos < size) {
    sig = (sig >> 8) | ((uint64_t)data[pos++] << 40);
@@ -78,29 +78,29 @@ int VP8EstimateQuality(const uint8_t* const data, size_t size) {
  GET_BIT(2);  // colorspace + clamp type

  // Segment header
-  if (GET_BIT(1)) {       // use_segment
+  if (GET_BIT(1)) {  // use_segment
    int s;
    const int update_map = GET_BIT(1);
-    if (GET_BIT(1)) {     // update data
+    if (GET_BIT(1)) {  // update data
      const int absolute_delta = GET_BIT(1);
-      int q[4]  = { 0, 0, 0, 0 };
+      int q[4] = {0, 0, 0, 0};
      for (s = 0; s < 4; ++s) {
        if (GET_BIT(1)) {
          q[s] = GET_BIT(7);
-          if (GET_BIT(1)) q[s] = -q[s];   // sign
+          if (GET_BIT(1)) q[s] = -q[s];  // sign
        }
      }
      if (absolute_delta) Q = q[0];  // just use the first segment's quantizer
-      for (s = 0; s < 4; ++s) CONDITIONAL_SKIP(7);   //  filter strength
+      for (s = 0; s < 4; ++s) CONDITIONAL_SKIP(7);  //  filter strength
    }
    if (update_map) {
      for (s = 0; s < 3; ++s) CONDITIONAL_SKIP(8);
    }
  }
  // Filter header
-  GET_BIT(1 + 6 + 3);     // simple + level + sharpness
-  if (GET_BIT(1)) {       // use_lf_delta
-    if (GET_BIT(1)) {     // update lf_delta?
+  GET_BIT(1 + 6 + 3);  // simple + level + sharpness
+  if (GET_BIT(1)) {    // use_lf_delta
+    if (GET_BIT(1)) {  // update lf_delta?
      int n;
      for (n = 0; n < 4 + 4; ++n) CONDITIONAL_SKIP(6);
    }
--- a/extras/vwebp_sdl.c
+++ b/extras/vwebp_sdl.c
@@ -23,10 +23,10 @@

 #if defined(WEBP_HAVE_SDL)

-#include "webp_to_sdl.h"
-#include "webp/decode.h"
-#include "imageio/imageio_util.h"
 #include "../examples/unicode.h"
+#include "imageio/imageio_util.h"
+#include "webp/decode.h"
+#include "webp_to_sdl.h"

 #if defined(WEBP_HAVE_JUST_SDL_H)
 #include <SDL.h>
@@ -41,11 +41,15 @@ static void ProcessEvents(void) {
    switch (event.type) {
      case SDL_KEYUP:
        switch (event.key.keysym.sym) {
-          case SDLK_q: done = 1; break;
-          default: break;
+          case SDLK_q:
+            done = 1;
+            break;
+          default:
+            break;
        }
        break;
-      default: break;
+      default:
+        break;
    }
  }
 }
@@ -93,7 +97,7 @@ int main(int argc, char* argv[]) {
  }
  ok = 1;

- Error:
+Error:
  SDL_Quit();
  FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }
--- a/extras/webp_quality.c
+++ b/extras/webp_quality.c
@@ -12,9 +12,9 @@
 #include <string.h>

 #include "../examples/unicode.h"
-#include "src/webp/types.h"
 #include "extras/extras.h"
 #include "imageio/imageio_util.h"
+#include "src/webp/types.h"

 // Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
 int main(int argc, const char* argv[]) {
@@ -46,7 +46,7 @@ int main(int argc, const char* argv[]) {
        if (!quiet) {
          printf("Estimated quality factor: %d\n", q);
        } else {
-          printf("%d\n", q);   // just print the number
+          printf("%d\n", q);  // just print the number
        }
      }
      free((void*)data);
--- a/extras/webp_to_sdl.c
+++ b/extras/webp_to_sdl.c
@@ -17,11 +17,10 @@

 #if defined(WEBP_HAVE_SDL)

-#include "webp_to_sdl.h"
-
 #include <stdio.h>

 #include "src/webp/decode.h"
+#include "webp_to_sdl.h"

 #if defined(WEBP_HAVE_JUST_SDL_H)
 #include <SDL.h>
@@ -67,11 +66,11 @@ int WebPToSDL(const char* data, unsigned int data_size) {
  }

 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
-  output = WebPDecodeBGRA((const uint8_t*)data, (size_t)data_size, &width,
-                          &height);
+  output =
+      WebPDecodeBGRA((const uint8_t*)data, (size_t)data_size, &width, &height);
 #else
-  output = WebPDecodeRGBA((const uint8_t*)data, (size_t)data_size, &width,
-                          &height);
+  output =
+      WebPDecodeRGBA((const uint8_t*)data, (size_t)data_size, &width, &height);
 #endif
  if (output == NULL) {
    fprintf(stderr, "Error decoding image (%d)\n", status);
@@ -84,7 +83,7 @@ int WebPToSDL(const char* data, unsigned int data_size) {
  SDL_RenderPresent(renderer);
  ok = 1;

- Error:
+Error:
  // We should call SDL_DestroyWindow(window) but that makes .js fail.
  SDL_DestroyRenderer(renderer);
  SDL_DestroyTexture(texture);
--- a/imageio/image_dec.c
+++ b/imageio/image_dec.c
@@ -9,9 +9,10 @@
 //
 // Generic image-type guessing.

+#include "./image_dec.h"
+
 #include <stddef.h>

-#include "./image_dec.h"
 #include "./metadata.h"
 #include "webp/encode.h"
 #include "webp/types.h"
@@ -62,8 +63,8 @@ WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,
 }

 static int FailReader(const uint8_t* const data, size_t data_size,
-                      struct WebPPicture* const pic,
-                      int keep_alpha, struct Metadata* const metadata) {
+                      struct WebPPicture* const pic, int keep_alpha,
+                      struct Metadata* const metadata) {
  (void)data;
  (void)data_size;
  (void)pic;
@@ -74,12 +75,18 @@ static int FailReader(const uint8_t* const data, size_t data_size,

 WebPImageReader WebPGetImageReader(WebPInputFileFormat format) {
  switch (format) {
-    case WEBP_PNG_FORMAT: return ReadPNG;
-    case WEBP_JPEG_FORMAT: return ReadJPEG;
-    case WEBP_TIFF_FORMAT: return ReadTIFF;
-    case WEBP_WEBP_FORMAT: return ReadWebP;
-    case WEBP_PNM_FORMAT: return ReadPNM;
-    default: return FailReader;
+    case WEBP_PNG_FORMAT:
+      return ReadPNG;
+    case WEBP_JPEG_FORMAT:
+      return ReadJPEG;
+    case WEBP_TIFF_FORMAT:
+      return ReadTIFF;
+    case WEBP_WEBP_FORMAT:
+      return ReadWebP;
+    case WEBP_PNM_FORMAT:
+      return ReadPNM;
+    default:
+      return FailReader;
  }
 }

--- a/imageio/image_dec.h
+++ b/imageio/image_dec.h
@@ -22,8 +22,8 @@
 #include "webp/config.h"
 #endif

-#include "./metadata.h"
 #include "./jpegdec.h"
+#include "./metadata.h"
 #include "./pngdec.h"
 #include "./pnmdec.h"
 #include "./tiffdec.h"
@@ -53,8 +53,8 @@ WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,

 // Signature for common image-reading functions (ReadPNG, ReadJPEG, ...)
 typedef int (*WebPImageReader)(const uint8_t* const data, size_t data_size,
-                               struct WebPPicture* const pic,
-                               int keep_alpha, struct Metadata* const metadata);
+                               struct WebPPicture* const pic, int keep_alpha,
+                               struct Metadata* const metadata);

 // Return the reader associated to a given file format.
 WebPImageReader WebPGetImageReader(WebPInputFileFormat format);
@@ -66,7 +66,7 @@ WebPImageReader WebPGuessImageReader(const uint8_t* const data,
                                     size_t data_size);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_IMAGE_DEC_H_
--- a/imageio/image_enc.c
+++ b/imageio/image_enc.c
@@ -17,7 +17,7 @@

 #ifdef WEBP_HAVE_PNG
 #include <png.h>
-#include <setjmp.h>   // note: this must be included *after* png.h
+#include <setjmp.h>  // note: this must be included *after* png.h
 #endif

 #ifdef HAVE_WINCODEC_H
@@ -26,13 +26,14 @@
 #endif
 #define CINTERFACE
 #define COBJMACROS
-#define _WIN32_IE 0x500  // Workaround bug in shlwapi.h when compiling C++
-                         // code with COBJMACROS.
+#define _WIN32_IE \
+  0x500            // Workaround bug in shlwapi.h when compiling C++
+                   // code with COBJMACROS.
 #include <ole2.h>  // CreateStreamOnHGlobal()
 #include <shlwapi.h>
 #include <tchar.h>
-#include <windows.h>
 #include <wincodec.h>
+#include <windows.h>
 #endif

 #include "../examples/unicode.h"
@@ -45,12 +46,12 @@

 #ifdef HAVE_WINCODEC_H

-#define IFS(fn)                                                     \
-  do {                                                              \
-    if (SUCCEEDED(hr)) {                                            \
-      hr = (fn);                                                    \
-      if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr);   \
-    }                                                               \
+#define IFS(fn)                                                   \
+  do {                                                            \
+    if (SUCCEEDED(hr)) {                                          \
+      hr = (fn);                                                  \
+      if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr); \
+    }                                                             \
  } while (0)

 #ifdef __cplusplus
@@ -59,8 +60,8 @@
 #define MAKE_REFGUID(x) &(x)
 #endif

-static HRESULT CreateOutputStream(const char* out_file_name,
-                                  int write_to_mem, IStream** stream) {
+static HRESULT CreateOutputStream(const char* out_file_name, int write_to_mem,
+                                  IStream** stream) {
  HRESULT hr = S_OK;
  if (write_to_mem) {
    // Output to a memory buffer. This is freed when 'stream' is released.
@@ -77,24 +78,22 @@ static HRESULT CreateOutputStream(const char* out_file_name,
 }

 static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
-                             REFGUID container_guid,
-                             uint8_t* rgb, int stride,
+                             REFGUID container_guid, uint8_t* rgb, int stride,
                             uint32_t width, uint32_t height, int has_alpha) {
  HRESULT hr = S_OK;
  IWICImagingFactory* factory = NULL;
  IWICBitmapFrameEncode* frame = NULL;
  IWICBitmapEncoder* encoder = NULL;
  IStream* stream = NULL;
-  WICPixelFormatGUID pixel_format = has_alpha ? GUID_WICPixelFormat32bppBGRA
-                                              : GUID_WICPixelFormat24bppBGR;
+  WICPixelFormatGUID pixel_format =
+      has_alpha ? GUID_WICPixelFormat32bppBGRA : GUID_WICPixelFormat24bppBGR;

  if (out_file_name == NULL || rgb == NULL) return E_INVALIDARG;

  IFS(CoInitialize(NULL));
-  IFS(CoCreateInstance(MAKE_REFGUID(CLSID_WICImagingFactory), NULL,
-                       CLSCTX_INPROC_SERVER,
-                       MAKE_REFGUID(IID_IWICImagingFactory),
-                       (LPVOID*)&factory));
+  IFS(CoCreateInstance(
+      MAKE_REFGUID(CLSID_WICImagingFactory), NULL, CLSCTX_INPROC_SERVER,
+      MAKE_REFGUID(IID_IWICImagingFactory), (LPVOID*)&factory));
  if (hr == REGDB_E_CLASSNOTREG) {
    fprintf(stderr,
            "Couldn't access Windows Imaging Component (are you running "
@@ -104,14 +103,13 @@ static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
  IFS(CreateOutputStream(out_file_name, use_stdout, &stream));
  IFS(IWICImagingFactory_CreateEncoder(factory, container_guid, NULL,
                                       &encoder));
-  IFS(IWICBitmapEncoder_Initialize(encoder, stream,
-                                   WICBitmapEncoderNoCache));
+  IFS(IWICBitmapEncoder_Initialize(encoder, stream, WICBitmapEncoderNoCache));
  IFS(IWICBitmapEncoder_CreateNewFrame(encoder, &frame, NULL));
  IFS(IWICBitmapFrameEncode_Initialize(frame, NULL));
  IFS(IWICBitmapFrameEncode_SetSize(frame, width, height));
  IFS(IWICBitmapFrameEncode_SetPixelFormat(frame, &pixel_format));
-  IFS(IWICBitmapFrameEncode_WritePixels(frame, height, stride,
-                                        height * stride, rgb));
+  IFS(IWICBitmapFrameEncode_WritePixels(frame, height, stride, height * stride,
+                                        rgb));
  IFS(IWICBitmapFrameEncode_Commit(frame));
  IFS(IWICBitmapEncoder_Commit(encoder));

@@ -153,11 +151,11 @@ int WebPWritePNG(const char* out_file_name, int use_stdout,
  const int has_alpha = WebPIsAlphaMode(buffer->colorspace);

  return SUCCEEDED(WriteUsingWIC(out_file_name, use_stdout,
-                                 MAKE_REFGUID(GUID_ContainerFormatPng),
-                                 rgb, stride, width, height, has_alpha));
+                                 MAKE_REFGUID(GUID_ContainerFormatPng), rgb,
+                                 stride, width, height, has_alpha));
 }

-#elif defined(WEBP_HAVE_PNG)    // !HAVE_WINCODEC_H
+#elif defined(WEBP_HAVE_PNG)  // !HAVE_WINCODEC_H
 static void PNGAPI PNGErrorFunction(png_structp png, png_const_charp unused) {
  (void)unused;  // remove variable-unused warning
  longjmp(png_jmpbuf(png), 1);
@@ -169,8 +167,8 @@ int WebPWritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {

  if (out_file == NULL || buffer == NULL) return 0;

-  png = png_create_write_struct(PNG_LIBPNG_VER_STRING,
-                                NULL, PNGErrorFunction, NULL);
+  png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, PNGErrorFunction,
+                                NULL);
  if (png == NULL) {
    return 0;
  }
@@ -206,11 +204,12 @@ int WebPWritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
  png_destroy_write_struct((png_structpp)&png, (png_infopp)&info);
  return 1;
 }
-#else    // !HAVE_WINCODEC_H && !WEBP_HAVE_PNG
+#else                         // !HAVE_WINCODEC_H && !WEBP_HAVE_PNG
 int WebPWritePNG(FILE* fout, const WebPDecBuffer* const buffer) {
  if (fout == NULL || buffer == NULL) return 0;

-  fprintf(stderr, "PNG support not compiled. Please install the libpng "
+  fprintf(stderr,
+          "PNG support not compiled. Please install the libpng "
          "development package before building.\n");
  fprintf(stderr, "You can run with -ppm flag to decode in PPM format.\n");
  return 0;
@@ -235,8 +234,10 @@ static int WritePPMPAM(FILE* fout, const WebPDecBuffer* const buffer,
    if (row == NULL) return 0;

    if (alpha) {
-      fprintf(fout, "P7\nWIDTH %u\nHEIGHT %u\nDEPTH 4\nMAXVAL 255\n"
-                    "TUPLTYPE RGB_ALPHA\nENDHDR\n", width, height);
+      fprintf(fout,
+              "P7\nWIDTH %u\nHEIGHT %u\nDEPTH 4\nMAXVAL 255\n"
+              "TUPLTYPE RGB_ALPHA\nENDHDR\n",
+              width, height);
    } else {
      fprintf(fout, "P6\n%u %u\n255\n", width, height);
    }
@@ -297,7 +298,7 @@ static void PutLE16(uint8_t* const dst, uint32_t value) {
 }

 static void PutLE32(uint8_t* const dst, uint32_t value) {
-  PutLE16(dst + 0, (value >>  0) & 0xffff);
+  PutLE16(dst + 0, (value >> 0) & 0xffff);
  PutLE16(dst + 2, (value >> 16) & 0xffff);
 }

@@ -310,7 +311,7 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
  int stride;
  uint32_t y;
  uint32_t bytes_per_px, line_size, image_size, bmp_stride, total_size;
-  uint8_t bmp_header[BMP_HEADER_SIZE + BMP_HEADER_ALPHA_EXTRA_SIZE] = { 0 };
+  uint8_t bmp_header[BMP_HEADER_SIZE + BMP_HEADER_ALPHA_EXTRA_SIZE] = {0};

  if (fout == NULL || buffer == NULL) return 0;

@@ -329,27 +330,27 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
  if (rgba == NULL) return 0;

  // bitmap file header
-  PutLE16(bmp_header + 0, 0x4d42);                // signature 'BM'
-  PutLE32(bmp_header + 2, total_size);            // size including header
-  PutLE32(bmp_header + 6, 0);                     // reserved
-  PutLE32(bmp_header + 10, header_size);          // offset to pixel array
+  PutLE16(bmp_header + 0, 0x4d42);        // signature 'BM'
+  PutLE32(bmp_header + 2, total_size);    // size including header
+  PutLE32(bmp_header + 6, 0);             // reserved
+  PutLE32(bmp_header + 10, header_size);  // offset to pixel array
  // bitmap info header
-  PutLE32(bmp_header + 14, header_size - 14);     // DIB header size
-  PutLE32(bmp_header + 18, width);                // dimensions
-  PutLE32(bmp_header + 22, height);               // no vertical flip
-  PutLE16(bmp_header + 26, 1);                    // number of planes
-  PutLE16(bmp_header + 28, bytes_per_px * 8);     // bits per pixel
-  PutLE32(bmp_header + 30, has_alpha ? 3 : 0);    // BI_BITFIELDS or BI_RGB
+  PutLE32(bmp_header + 14, header_size - 14);   // DIB header size
+  PutLE32(bmp_header + 18, width);              // dimensions
+  PutLE32(bmp_header + 22, height);             // no vertical flip
+  PutLE16(bmp_header + 26, 1);                  // number of planes
+  PutLE16(bmp_header + 28, bytes_per_px * 8);   // bits per pixel
+  PutLE32(bmp_header + 30, has_alpha ? 3 : 0);  // BI_BITFIELDS or BI_RGB
  PutLE32(bmp_header + 34, image_size);
-  PutLE32(bmp_header + 38, 2400);                 // x pixels/meter
-  PutLE32(bmp_header + 42, 2400);                 // y pixels/meter
-  PutLE32(bmp_header + 46, 0);                    // number of palette colors
-  PutLE32(bmp_header + 50, 0);                    // important color count
-  if (has_alpha) {  // BITMAPV3INFOHEADER complement
-    PutLE32(bmp_header + 54, 0x00ff0000);         // red mask
-    PutLE32(bmp_header + 58, 0x0000ff00);         // green mask
-    PutLE32(bmp_header + 62, 0x000000ff);         // blue mask
-    PutLE32(bmp_header + 66, 0xff000000);         // alpha mask
+  PutLE32(bmp_header + 38, 2400);          // x pixels/meter
+  PutLE32(bmp_header + 42, 2400);          // y pixels/meter
+  PutLE32(bmp_header + 46, 0);             // number of palette colors
+  PutLE32(bmp_header + 50, 0);             // important color count
+  if (has_alpha) {                         // BITMAPV3INFOHEADER complement
+    PutLE32(bmp_header + 54, 0x00ff0000);  // red mask
+    PutLE32(bmp_header + 58, 0x0000ff00);  // green mask
+    PutLE32(bmp_header + 62, 0x000000ff);  // blue mask
+    PutLE32(bmp_header + 66, 0xff000000);  // alpha mask
  }

  // TODO(skal): color profile
@@ -367,7 +368,7 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
    }
    // write padding zeroes
    if (bmp_stride != line_size) {
-      const uint8_t zeroes[3] = { 0 };
+      const uint8_t zeroes[3] = {0};
      if (fwrite(zeroes, bmp_stride - line_size, 1, fout) != 1) {
        return 0;
      }
@@ -397,35 +398,35 @@ int WebPWriteTIFF(FILE* fout, const WebPDecBuffer* const buffer) {
  // For non-alpha case, we omit tag 0x152 (ExtraSamples).
  const uint8_t num_ifd_entries = 0;
  uint8_t tiff_header[TIFF_HEADER_SIZE] = {
-    0x49, 0x49, 0x2a, 0x00,   // little endian signature
-    8, 0, 0, 0,               // offset to the unique IFD that follows
-    // IFD (offset = 8). Entries must be written in increasing tag order.
-    num_ifd_entries, 0,       // Number of entries in the IFD (12 bytes each).
-    0x00, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,    //  10: Width  (TBD)
-    0x01, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,    //  22: Height (TBD)
-    0x02, 0x01, 3, 0, bytes_per_px, 0, 0, 0,     //  34: BitsPerSample: 8888
-        EXTRA_DATA_OFFSET + 0, 0, 0, 0,
-    0x03, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    //  46: Compression: none
-    0x06, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,    //  58: Photometric: RGB
-    0x11, 0x01, 4, 0, 1, 0, 0, 0,                //  70: Strips offset:
-        TIFF_HEADER_SIZE, 0, 0, 0,               //      data follows header
-    0x12, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    //  82: Orientation: topleft
-    0x15, 0x01, 3, 0, 1, 0, 0, 0,                //  94: SamplesPerPixels
-        bytes_per_px, 0, 0, 0,
-    0x16, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,    // 106: Rows per strip (TBD)
-    0x17, 0x01, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0,    // 118: StripByteCount (TBD)
-    0x1a, 0x01, 5, 0, 1, 0, 0, 0,                // 130: X-resolution
-        EXTRA_DATA_OFFSET + 8, 0, 0, 0,
-    0x1b, 0x01, 5, 0, 1, 0, 0, 0,                // 142: Y-resolution
-        EXTRA_DATA_OFFSET + 8, 0, 0, 0,
-    0x1c, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    // 154: PlanarConfiguration
-    0x28, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,    // 166: ResolutionUnit (inch)
-    0x52, 0x01, 3, 0, 1, 0, 0, 0,
-        assoc_alpha, 0, 0, 0,                    // 178: ExtraSamples: rgbA/RGBA
-    0, 0, 0, 0,                                  // 190: IFD terminator
-    // EXTRA_DATA_OFFSET:
-    8, 0, 8, 0, 8, 0, 8, 0,      // BitsPerSample
-    72, 0, 0, 0, 1, 0, 0, 0      // 72 pixels/inch, for X/Y-resolution
+      0x49, 0x49, 0x2a, 0x00,  // little endian signature
+      8, 0, 0, 0,              // offset to the unique IFD that follows
+      // IFD (offset = 8). Entries must be written in increasing tag order.
+      num_ifd_entries, 0,  // Number of entries in the IFD (12 bytes each).
+      0x00, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,  //  10: Width  (TBD)
+      0x01, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,  //  22: Height (TBD)
+      0x02, 0x01, 3, 0, bytes_per_px, 0, 0, 0,   //  34: BitsPerSample: 8888
+      EXTRA_DATA_OFFSET + 0, 0, 0, 0, 0x03, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0,
+      0,                                         //  46: Compression: none
+      0x06, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,  //  58: Photometric: RGB
+      0x11, 0x01, 4, 0, 1, 0, 0, 0,              //  70: Strips offset:
+      TIFF_HEADER_SIZE, 0, 0, 0,                 //      data follows header
+      0x12, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,  //  82: Orientation: topleft
+      0x15, 0x01, 3, 0, 1, 0, 0, 0,              //  94: SamplesPerPixels
+      bytes_per_px, 0, 0, 0, 0x16, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0,
+      0,                                         // 106: Rows per strip (TBD)
+      0x17, 0x01, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0,  // 118: StripByteCount (TBD)
+      0x1a, 0x01, 5, 0, 1, 0, 0, 0,              // 130: X-resolution
+      EXTRA_DATA_OFFSET + 8, 0, 0, 0, 0x1b, 0x01, 5, 0, 1, 0, 0,
+      0,  // 142: Y-resolution
+      EXTRA_DATA_OFFSET + 8, 0, 0, 0, 0x1c, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0,
+      0,                                         // 154: PlanarConfiguration
+      0x28, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,  // 166: ResolutionUnit (inch)
+      0x52, 0x01, 3, 0, 1, 0, 0, 0, assoc_alpha, 0, 0,
+      0,           // 178: ExtraSamples: rgbA/RGBA
+      0, 0, 0, 0,  // 190: IFD terminator
+      // EXTRA_DATA_OFFSET:
+      8, 0, 8, 0, 8, 0, 8, 0,  // BitsPerSample
+      72, 0, 0, 0, 1, 0, 0, 0  // 72 pixels/inch, for X/Y-resolution
  };
  uint32_t y;

@@ -517,11 +518,11 @@ int WebPWritePGM(FILE* fout, const WebPDecBuffer* const buffer) {

    if (src_y == NULL || src_u == NULL || src_v == NULL) return 0;

-    fprintf(fout, "P5\n%d %d\n255\n",
-            (width + 1) & ~1, height + uv_height + a_height);
+    fprintf(fout, "P5\n%d %d\n255\n", (width + 1) & ~1,
+            height + uv_height + a_height);
    for (y = 0; ok && y < height; ++y) {
      ok &= (fwrite(src_y, width, 1, fout) == 1);
-      if (width & 1) fputc(0, fout);    // padding byte
+      if (width & 1) fputc(0, fout);  // padding byte
      src_y += yuv->y_stride;
    }
    for (y = 0; ok && y < uv_height; ++y) {
@@ -532,7 +533,7 @@ int WebPWritePGM(FILE* fout, const WebPDecBuffer* const buffer) {
    }
    for (y = 0; ok && y < a_height; ++y) {
      ok &= (fwrite(src_a, width, 1, fout) == 1);
-      if (width & 1) fputc(0, fout);    // padding byte
+      if (width & 1) fputc(0, fout);  // padding byte
      src_a += yuv->a_stride;
    }
    return ok;
@@ -609,8 +610,7 @@ int WebPSaveImage(const WebPDecBuffer* const buffer,
    }
  }

-  if (format == PNG ||
-      format == RGBA || format == BGRA || format == ARGB ||
+  if (format == PNG || format == RGBA || format == BGRA || format == ARGB ||
      format == rgbA || format == bgrA || format == Argb) {
 #ifdef HAVE_WINCODEC_H
    ok &= WebPWritePNG(out_file_name, use_stdout, buffer);
--- a/imageio/image_enc.h
+++ b/imageio/image_enc.h
@@ -20,8 +20,8 @@
 #include "webp/config.h"
 #endif

-#include "webp/types.h"
 #include "webp/decode.h"
+#include "webp/types.h"

 #ifdef __cplusplus
 extern "C" {
@@ -38,10 +38,19 @@ typedef enum {
  RAW_YUV,
  ALPHA_PLANE_ONLY,  // this is for experimenting only
  // forced colorspace output (for testing, mostly)
-  RGB, RGBA, BGR, BGRA, ARGB,
-  RGBA_4444, RGB_565,
-  rgbA, bgrA, Argb, rgbA_4444,
-  YUV, YUVA
+  RGB,
+  RGBA,
+  BGR,
+  BGRA,
+  ARGB,
+  RGBA_4444,
+  RGB_565,
+  rgbA,
+  bgrA,
+  Argb,
+  rgbA_4444,
+  YUV,
+  YUVA
 } WebPOutputFileFormat;

 // General all-purpose call.
@@ -90,7 +99,7 @@ int WebPWriteYUV(FILE* fout, const struct WebPDecBuffer* const buffer);
 int WebPWrite16bAsPGM(FILE* fout, const struct WebPDecBuffer* const buffer);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_IMAGE_ENC_H_
--- a/imageio/imageio_util.c
+++ b/imageio/imageio_util.c
@@ -13,15 +13,15 @@
 #include "./imageio_util.h"

 #if defined(_WIN32)
-#include <fcntl.h>   // for _O_BINARY
-#include <io.h>      // for _setmode()
+#include <fcntl.h>  // for _O_BINARY
+#include <io.h>     // for _setmode()
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

-#include "webp/types.h"
 #include "../examples/unicode.h"
+#include "webp/types.h"

 // -----------------------------------------------------------------------------
 // File I/O
@@ -65,14 +65,14 @@ int ImgIoUtilReadFromStdin(const uint8_t** data, size_t* data_size) {
  *data_size = size;
  return 1;

- Error:
+Error:
  free(input);
  fprintf(stderr, "Could not read from stdin\n");
  return 0;
 }

-int ImgIoUtilReadFile(const char* const file_name,
-                      const uint8_t** data, size_t* data_size) {
+int ImgIoUtilReadFile(const char* const file_name, const uint8_t** data,
+                      size_t* data_size) {
  int ok;
  uint8_t* file_data;
  size_t file_size;
@@ -123,8 +123,8 @@ int ImgIoUtilReadFile(const char* const file_name,

 // -----------------------------------------------------------------------------

-int ImgIoUtilWriteFile(const char* const file_name,
-                       const uint8_t* data, size_t data_size) {
+int ImgIoUtilWriteFile(const char* const file_name, const uint8_t* data,
+                       size_t data_size) {
  int ok;
  FILE* out;
  const int to_stdout = (file_name == NULL) || !WSTRCMP(file_name, "-");
@@ -145,8 +145,8 @@ int ImgIoUtilWriteFile(const char* const file_name,

 // -----------------------------------------------------------------------------

-void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
-                        uint8_t* dst, int dst_stride, int width, int height) {
+void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst,
+                        int dst_stride, int width, int height) {
  while (height-- > 0) {
    memcpy(dst, src, width * sizeof(*dst));
    src += src_stride;
--- a/imageio/imageio_util.h
+++ b/imageio/imageio_util.h
@@ -36,22 +36,22 @@ FILE* ImgIoUtilSetBinaryMode(FILE* file);
 // to be used as a C-string.
 // If 'file_name' is NULL or equal to "-", input is read from stdin by calling
 // the function ImgIoUtilReadFromStdin().
-int ImgIoUtilReadFile(const char* const file_name,
-                      const uint8_t** data, size_t* data_size);
+int ImgIoUtilReadFile(const char* const file_name, const uint8_t** data,
+                      size_t* data_size);

 // Same as ImgIoUtilReadFile(), but reads until EOF from stdin instead.
 int ImgIoUtilReadFromStdin(const uint8_t** data, size_t* data_size);

 // Write a data segment into a file named 'file_name'. Returns true if ok.
 // If 'file_name' is NULL or equal to "-", output is written to stdout.
-int ImgIoUtilWriteFile(const char* const file_name,
-                       const uint8_t* data, size_t data_size);
+int ImgIoUtilWriteFile(const char* const file_name, const uint8_t* data,
+                       size_t data_size);

 //------------------------------------------------------------------------------

 // Copy width x height pixels from 'src' to 'dst' honoring the strides.
-void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
-                        uint8_t* dst, int dst_stride, int width, int height);
+void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst,
+                        int dst_stride, int width, int height);

 //------------------------------------------------------------------------------

@@ -59,7 +59,7 @@ void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
 int ImgIoUtilCheckSizeArgumentsOverflow(uint64_t stride, size_t height);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_IMAGEIO_UTIL_H_
--- a/imageio/jpegdec.c
+++ b/imageio/jpegdec.c
@@ -18,8 +18,8 @@
 #include <stdio.h>

 #ifdef WEBP_HAVE_JPEG
-#include <jpeglib.h>
 #include <jerror.h>
+#include <jpeglib.h>
 #include <setjmp.h>
 #include <stdlib.h>
 #include <string.h>
@@ -33,10 +33,10 @@
 // Metadata processing

 #ifndef JPEG_APP1
-# define JPEG_APP1 (JPEG_APP0 + 1)
+#define JPEG_APP1 (JPEG_APP0 + 1)
 #endif
 #ifndef JPEG_APP2
-# define JPEG_APP2 (JPEG_APP0 + 2)
+#define JPEG_APP2 (JPEG_APP0 + 2)
 #endif

 typedef struct {
@@ -64,7 +64,7 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
  // ICC.1:2010-12 (4.3.0.0) Annex B.4 Embedding ICC Profiles in JPEG files
  static const char kICCPSignature[] = "ICC_PROFILE";
  static const size_t kICCPSignatureLength = 12;  // signature includes '\0'
-  static const size_t kICCPSkipLength = 14;  // signature + seq & count
+  static const size_t kICCPSkipLength = 14;       // signature + seq & count
  int expected_count = 0;
  int actual_count = 0;
  int seq_max = 0;
@@ -74,8 +74,7 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {

  memset(iccp_segments, 0, sizeof(iccp_segments));
  for (marker = dinfo->marker_list; marker != NULL; marker = marker->next) {
-    if (marker->marker == JPEG_APP2 &&
-        marker->data_length > kICCPSkipLength &&
+    if (marker->marker == JPEG_APP2 && marker->data_length > kICCPSkipLength &&
        !memcmp(marker->data, kICCPSignature, kICCPSignatureLength)) {
      // ICC_PROFILE\0<seq><count>; 'seq' starts at 1.
      const int seq = marker->data[kICCPSignatureLength];
@@ -84,8 +83,9 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
      ICCPSegment* segment;

      if (segment_size == 0 || count == 0 || seq == 0) {
-        fprintf(stderr, "[ICCP] size (%d) / count (%d) / sequence number (%d)"
-                        " cannot be 0!\n",
+        fprintf(stderr,
+                "[ICCP] size (%d) / count (%d) / sequence number (%d)"
+                " cannot be 0!\n",
                (int)segment_size, seq, count);
        return 0;
      }
@@ -100,7 +100,7 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {

      segment = iccp_segments + seq - 1;
      if (segment->data_length != 0) {
-        fprintf(stderr, "[ICCP] Duplicate segment number (%d)!\n" , seq);
+        fprintf(stderr, "[ICCP] Duplicate segment number (%d)!\n", seq);
        return 0;
      }

@@ -138,8 +138,8 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
    int i;
    size_t offset = 0;
    for (i = 0; i < seq_max; ++i) {
-      memcpy(iccp->bytes + offset,
-             iccp_segments[i].data, iccp_segments[i].data_length);
+      memcpy(iccp->bytes + offset, iccp_segments[i].data,
+             iccp_segments[i].data_length);
      offset += iccp_segments[i].data_length;
    }
  }
@@ -156,12 +156,12 @@ static int ExtractMetadataFromJPEG(j_decompress_ptr dinfo,
    size_t signature_length;
    size_t storage_offset;
  } kJPEGMetadataMap[] = {
-    // Exif 2.2 Section 4.7.2 Interoperability Structure of APP1 ...
-    { JPEG_APP1, "Exif\0",                        6, METADATA_OFFSET(exif) },
-    // XMP Specification Part 3 Section 3 Embedding XMP Metadata ... #JPEG
-    // TODO(jzern) Add support for 'ExtendedXMP'
-    { JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, METADATA_OFFSET(xmp) },
-    { 0, NULL, 0, 0 },
+      // Exif 2.2 Section 4.7.2 Interoperability Structure of APP1 ...
+      {JPEG_APP1, "Exif\0", 6, METADATA_OFFSET(exif)},
+      // XMP Specification Part 3 Section 3 Embedding XMP Metadata ... #JPEG
+      // TODO(jzern) Add support for 'ExtendedXMP'
+      {JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, METADATA_OFFSET(xmp)},
+      {0, NULL, 0, 0},
  };
  jpeg_saved_marker_ptr marker;
  // Treat ICC profiles separately as they may be segmented and out of order.
@@ -179,8 +179,8 @@ static int ExtractMetadataFromJPEG(j_decompress_ptr dinfo,
                               kJPEGMetadataMap[i].storage_offset);

        if (payload->bytes == NULL) {
-          const char* marker_data = (const char*)marker->data +
-                                    kJPEGMetadataMap[i].signature_length;
+          const char* marker_data =
+              (const char*)marker->data + kJPEGMetadataMap[i].signature_length;
          const size_t marker_data_length =
              marker->data_length - kJPEGMetadataMap[i].signature_length;
          if (!MetadataCopy(marker_data, marker_data_length, payload)) return 0;
@@ -250,9 +250,7 @@ static void ContextSkip(j_decompress_ptr cinfo, long jump_size) {
  ctx->pub.next_input_byte += jump;
 }

-static void ContextTerm(j_decompress_ptr cinfo) {
-  (void)cinfo;
-}
+static void ContextTerm(j_decompress_ptr cinfo) { (void)cinfo; }

 static void ContextSetup(volatile struct jpeg_decompress_struct* const cinfo,
                         JPEGReadContext* const ctx) {
@@ -267,8 +265,7 @@ static void ContextSetup(volatile struct jpeg_decompress_struct* const cinfo,
 }

 int ReadJPEG(const uint8_t* const data, size_t data_size,
-             WebPPicture* const pic, int keep_alpha,
-             Metadata* const metadata) {
+             WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
  volatile int ok = 0;
  int width, height;
  int64_t stride;
@@ -285,12 +282,12 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
  ctx.data = data;
  ctx.data_size = data_size;

-  memset((j_decompress_ptr)&dinfo, 0, sizeof(dinfo));   // for setjmp safety
+  memset((j_decompress_ptr)&dinfo, 0, sizeof(dinfo));  // for setjmp safety
  dinfo.err = jpeg_std_error(&jerr.pub);
  jerr.pub.error_exit = my_error_exit;

  if (setjmp(jerr.setjmp_buffer)) {
- Error:
+  Error:
    MetadataFree(metadata);
    jpeg_destroy_decompress((j_decompress_ptr)&dinfo);
    goto End;
@@ -353,11 +350,11 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
    MetadataFree(metadata);  // In case the caller forgets to free it on error.
  }

- End:
+End:
  free(rgb);
  return ok;
 }
-#else  // !WEBP_HAVE_JPEG
+#else   // !WEBP_HAVE_JPEG
 int ReadJPEG(const uint8_t* const data, size_t data_size,
             struct WebPPicture* const pic, int keep_alpha,
             struct Metadata* const metadata) {
@@ -366,7 +363,8 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
-  fprintf(stderr, "JPEG support not compiled. Please install the libjpeg "
+  fprintf(stderr,
+          "JPEG support not compiled. Please install the libjpeg "
          "development package before building.\n");
  return 0;
 }
--- a/imageio/jpegdec.h
+++ b/imageio/jpegdec.h
@@ -33,7 +33,7 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
             struct Metadata* const metadata);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_JPEGDEC_H_
--- a/imageio/metadata.h
+++ b/imageio/metadata.h
@@ -43,7 +43,7 @@ int MetadataCopy(const char* metadata, size_t metadata_len,
                 MetadataPayload* const payload);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_METADATA_H_
--- a/imageio/pngdec.c
+++ b/imageio/pngdec.c
@@ -22,8 +22,7 @@
 #define PNG_USER_MEM_SUPPORTED  // for png_create_read_struct_2
 #endif
 #include <png.h>
-
-#include <setjmp.h>   // note: this must be included *after* png.h
+#include <setjmp.h>  // note: this must be included *after* png.h
 #include <stdlib.h>
 #include <string.h>

@@ -33,15 +32,14 @@
 #include "webp/types.h"

 #define LOCAL_PNG_VERSION ((PNG_LIBPNG_VER_MAJOR << 8) | PNG_LIBPNG_VER_MINOR)
-#define LOCAL_PNG_PREREQ(maj, min) \
-   (LOCAL_PNG_VERSION >= (((maj) << 8) | (min)))
+#define LOCAL_PNG_PREREQ(maj, min) (LOCAL_PNG_VERSION >= (((maj) << 8) | (min)))

 static void PNGAPI error_function(png_structp png, png_const_charp error) {
  if (error != NULL) fprintf(stderr, "libpng error: %s\n", error);
  longjmp(png_jmpbuf(png), 1);
 }

-#if LOCAL_PNG_PREREQ(1,4)
+#if LOCAL_PNG_PREREQ(1, 4)
 typedef png_alloc_size_t LocalPngAllocSize;
 #else
 typedef png_size_t LocalPngAllocSize;
@@ -113,7 +111,8 @@ static int ProcessRawProfile(const char* profile, size_t profile_len,
  }
  ++src;
  // skip the profile name and extract the length.
-  while (*src != '\0' && *src++ != '\n') {}
+  while (*src != '\0' && *src++ != '\n') {
+  }
  expected_length = (int)strtol(src, &end, 10);
  if (*end != '\n') {
    fprintf(stderr, "Malformed raw profile, expected '\\n' got '\\x%.2X'\n",
@@ -135,30 +134,29 @@ static const struct {
                 MetadataPayload* const payload);
  size_t storage_offset;
 } kPNGMetadataMap[] = {
-  // https://exiftool.org/TagNames/PNG.html#TextualData
-  // See also: ExifTool on CPAN.
-  { "Raw profile type exif", ProcessRawProfile, METADATA_OFFSET(exif) },
-  { "Raw profile type xmp",  ProcessRawProfile, METADATA_OFFSET(xmp) },
-  // Exiftool puts exif data in APP1 chunk, too.
-  { "Raw profile type APP1", ProcessRawProfile, METADATA_OFFSET(exif) },
-  // ImageMagick uses lowercase app1.
-  { "Raw profile type app1", ProcessRawProfile, METADATA_OFFSET(exif) },
-  // XMP Specification Part 3, Section 3 #PNG
-  { "XML:com.adobe.xmp",     MetadataCopy,      METADATA_OFFSET(xmp) },
-  { NULL, NULL, 0 },
+    // https://exiftool.org/TagNames/PNG.html#TextualData
+    // See also: ExifTool on CPAN.
+    {"Raw profile type exif", ProcessRawProfile, METADATA_OFFSET(exif)},
+    {"Raw profile type xmp", ProcessRawProfile, METADATA_OFFSET(xmp)},
+    // Exiftool puts exif data in APP1 chunk, too.
+    {"Raw profile type APP1", ProcessRawProfile, METADATA_OFFSET(exif)},
+    // ImageMagick uses lowercase app1.
+    {"Raw profile type app1", ProcessRawProfile, METADATA_OFFSET(exif)},
+    // XMP Specification Part 3, Section 3 #PNG
+    {"XML:com.adobe.xmp", MetadataCopy, METADATA_OFFSET(xmp)},
+    {NULL, NULL, 0},
 };

 // Looks for metadata at both the beginning and end of the PNG file, giving
 // preference to the head.
 // Returns true on success. The caller must use MetadataFree() on 'metadata' in
 // all cases.
-static int ExtractMetadataFromPNG(png_structp png,
-                                  png_infop const head_info,
+static int ExtractMetadataFromPNG(png_structp png, png_infop const head_info,
                                  png_infop const end_info,
                                  Metadata* const metadata) {
  int p;

-  for (p = 0; p < 2; ++p)  {
+  for (p = 0; p < 2; ++p) {
    png_infop const info = (p == 0) ? head_info : end_info;
    png_textp text = NULL;
    const png_uint_32 num = png_get_text(png, info, &text, NULL);
@@ -215,15 +213,15 @@ static int ExtractMetadataFromPNG(png_structp png,
    {
      png_charp name;
      int comp_type;
-#if LOCAL_PNG_PREREQ(1,5)
+#if LOCAL_PNG_PREREQ(1, 5)
      png_bytep profile;
 #else
      png_charp profile;
 #endif
      png_uint_32 len;

-      if (png_get_iCCP(png, info,
-                       &name, &comp_type, &profile, &len) == PNG_INFO_iCCP) {
+      if (png_get_iCCP(png, info, &name, &comp_type, &profile, &len) ==
+          PNG_INFO_iCCP) {
        if (!MetadataCopy((const char*)profile, len, &metadata->iccp)) return 0;
      }
    }
@@ -248,12 +246,12 @@ static void ReadFunc(png_structp png_ptr, png_bytep data, png_size_t length) {
 }

 int ReadPNG(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic,
-            int keep_alpha, struct Metadata* const metadata) {
+            struct WebPPicture* const pic, int keep_alpha,
+            struct Metadata* const metadata) {
  volatile png_structp png = NULL;
  volatile png_infop info = NULL;
  volatile png_infop end_info = NULL;
-  PNGReadContext context = { NULL, 0, 0 };
+  PNGReadContext context = {NULL, 0, 0};
  int color_type, bit_depth, interlaced;
  int num_channels;
  int num_passes;
@@ -268,19 +266,19 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
  context.data = data;
  context.data_size = data_size;

-  png = png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL,
-                                 NULL, MallocFunc, FreeFunc);
+  png = png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL, NULL,
+                                 MallocFunc, FreeFunc);
  if (png == NULL) goto End;

  png_set_error_fn(png, 0, error_function, NULL);
  if (setjmp(png_jmpbuf(png))) {
- Error:
+  Error:
    MetadataFree(metadata);
    goto End;
  }

-#if LOCAL_PNG_PREREQ(1,5) || \
-    (LOCAL_PNG_PREREQ(1,4) && PNG_LIBPNG_VER_RELEASE >= 1)
+#if LOCAL_PNG_PREREQ(1, 5) || \
+    (LOCAL_PNG_PREREQ(1, 4) && PNG_LIBPNG_VER_RELEASE >= 1)
  // If it looks like the bitstream is going to need more memory than libpng's
  // internal limit (default: 8M), try to (reasonably) raise it.
  if (data_size > png_get_chunk_malloc_max(png) && data_size < (1u << 24)) {
@@ -295,9 +293,9 @@ int ReadPNG(const uint8_t* const data, size_t data_size,

  png_set_read_fn(png, &context, ReadFunc);
  png_read_info(png, info);
-  if (!png_get_IHDR(png, info,
-                    &width, &height, &bit_depth, &color_type, &interlaced,
-                    NULL, NULL)) goto Error;
+  if (!png_get_IHDR(png, info, &width, &height, &bit_depth, &color_type,
+                    &interlaced, NULL, NULL))
+    goto Error;

  png_set_strip_16(png);
  png_set_packing(png);
@@ -368,24 +366,25 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
    goto Error;
  }

- End:
+End:
  if (png != NULL) {
-    png_destroy_read_struct((png_structpp)&png,
-                            (png_infopp)&info, (png_infopp)&end_info);
+    png_destroy_read_struct((png_structpp)&png, (png_infopp)&info,
+                            (png_infopp)&end_info);
  }
  free(rgb);
  return ok;
 }
-#else  // !WEBP_HAVE_PNG
+#else   // !WEBP_HAVE_PNG
 int ReadPNG(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic,
-            int keep_alpha, struct Metadata* const metadata) {
+            struct WebPPicture* const pic, int keep_alpha,
+            struct Metadata* const metadata) {
  (void)data;
  (void)data_size;
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
-  fprintf(stderr, "PNG support not compiled. Please install the libpng "
+  fprintf(stderr,
+          "PNG support not compiled. Please install the libpng "
          "development package before building.\n");
  return 0;
 }
--- a/imageio/pngdec.h
+++ b/imageio/pngdec.h
@@ -29,11 +29,11 @@ struct WebPPicture;
 // or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
 // Returns true on success.
 int ReadPNG(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic,
-            int keep_alpha, struct Metadata* const metadata);
+            struct WebPPicture* const pic, int keep_alpha,
+            struct Metadata* const metadata);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_PNGDEC_H_
--- a/imageio/pnmdec.c
+++ b/imageio/pnmdec.c
@@ -26,11 +26,11 @@
 #endif

 typedef enum {
-  WIDTH_FLAG      = 1 << 0,
-  HEIGHT_FLAG     = 1 << 1,
-  DEPTH_FLAG      = 1 << 2,
-  MAXVAL_FLAG     = 1 << 3,
-  TUPLE_FLAG      = 1 << 4,
+  WIDTH_FLAG = 1 << 0,
+  HEIGHT_FLAG = 1 << 1,
+  DEPTH_FLAG = 1 << 2,
+  MAXVAL_FLAG = 1 << 3,
+  TUPLE_FLAG = 1 << 4,
  ALL_NEEDED_FLAGS = WIDTH_FLAG | HEIGHT_FLAG | DEPTH_FLAG | MAXVAL_FLAG
 } PNMFlags;

@@ -39,9 +39,9 @@ typedef struct {
  size_t data_size;
  int width, height;
  int bytes_per_px;
-  int depth;          // 1 (grayscale), 2 (grayscale + alpha), 3 (rgb), 4 (rgba)
+  int depth;  // 1 (grayscale), 2 (grayscale + alpha), 3 (rgb), 4 (rgba)
  int max_value;
-  int type;           // 5, 6 or 7
+  int type;  // 5, 6 or 7
  int seen_flags;
 } PNMInfo;

@@ -55,7 +55,7 @@ static size_t ReadLine(const uint8_t* const data, size_t off, size_t data_size,
                       char out[MAX_LINE_SIZE + 1], size_t* const out_size) {
  size_t i = 0;
  *out_size = 0;
- redo:
+redo:
  for (i = 0; i < MAX_LINE_SIZE && off < data_size; ++i) {
    out[i] = data[off++];
    if (out[i] == '\n') break;
@@ -64,7 +64,7 @@ static size_t ReadLine(const uint8_t* const data, size_t off, size_t data_size,
    if (i == 0) goto redo;         // empty line
    if (out[0] == '#') goto redo;  // skip comment
  }
-  out[i] = 0;   // safety sentinel
+  out[i] = 0;  // safety sentinel
  *out_size = i;
  return off;
 }
@@ -173,9 +173,8 @@ static size_t ReadHeader(PNMInfo* const info) {
    info->depth = (info->type == 5) ? 1 : 3;
  }
  // perform some basic numerical validation
-  if (info->width <= 0 || info->height <= 0 ||
-      info->type <= 0 || info->type >= 9 ||
-      info->depth <= 0 || info->depth > 4 ||
+  if (info->width <= 0 || info->height <= 0 || info->type <= 0 ||
+      info->type >= 9 || info->depth <= 0 || info->depth > 4 ||
      info->max_value <= 0 || info->max_value >= 65536) {
    return 0;
  }
@@ -183,13 +182,12 @@ static size_t ReadHeader(PNMInfo* const info) {
  return off;
 }

-int ReadPNM(const uint8_t* const data, size_t data_size,
-            WebPPicture* const pic, int keep_alpha,
-            struct Metadata* const metadata) {
+int ReadPNM(const uint8_t* const data, size_t data_size, WebPPicture* const pic,
+            int keep_alpha, struct Metadata* const metadata) {
  int ok = 0;
  int i, j;
  uint64_t stride, pixel_bytes, sample_size, depth;
-  uint8_t* rgb = NULL, *tmp_rgb;
+  uint8_t *rgb = NULL, *tmp_rgb;
  size_t offset;
  PNMInfo info;

@@ -209,8 +207,8 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
  // Some basic validations.
  if (pic == NULL) goto End;
  if (info.width > WEBP_MAX_DIMENSION || info.height > WEBP_MAX_DIMENSION) {
-    fprintf(stderr, "Invalid %dx%d dimension for PNM\n",
-                    info.width, info.height);
+    fprintf(stderr, "Invalid %dx%d dimension for PNM\n", info.width,
+            info.height);
    goto End;
  }

@@ -258,8 +256,8 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
      const uint32_t round = info.max_value / 2;
      int k = 0;
      for (i = 0; i < info.width * info.depth; ++i) {
-        uint32_t v = (sample_size == 2) ? 256u * in[2 * i + 0] + in[2 * i + 1]
-                   : in[i];
+        uint32_t v =
+            (sample_size == 2) ? 256u * in[2 * i + 0] + in[2 * i + 1] : in[i];
        if (info.max_value != 255) v = (v * 255u + round) / info.max_value;
        if (v > 255u) v = 255u;
        if (info.depth > 2) {
@@ -291,7 +289,7 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
  if (!ok) goto End;

  ok = 1;
- End:
+End:
  free((void*)rgb);

  (void)metadata;
--- a/imageio/pnmdec.h
+++ b/imageio/pnmdec.h
@@ -33,7 +33,7 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
            struct Metadata* const metadata);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_PNMDEC_H_
--- a/imageio/tiffdec.c
+++ b/imageio/tiffdec.c
@@ -31,9 +31,9 @@ static const struct {
  ttag_t tag;
  size_t storage_offset;
 } kTIFFMetadataMap[] = {
-  { TIFFTAG_ICCPROFILE, METADATA_OFFSET(iccp) },
-  { TIFFTAG_XMLPACKET,  METADATA_OFFSET(xmp) },
-  { 0, 0 },
+    {TIFFTAG_ICCPROFILE, METADATA_OFFSET(iccp)},
+    {TIFFTAG_XMLPACKET, METADATA_OFFSET(xmp)},
+    {0, 0},
 };

 // Returns true on success. The caller must use MetadataFree() on 'metadata' in
@@ -86,9 +86,9 @@ static toff_t MySize(thandle_t opaque) {

 static toff_t MySeek(thandle_t opaque, toff_t offset, int whence) {
  MyData* const my_data = (MyData*)opaque;
-  offset += (whence == SEEK_CUR) ? my_data->pos
-          : (whence == SEEK_SET) ? 0
-          : my_data->size;
+  offset += (whence == SEEK_CUR)   ? my_data->pos
+            : (whence == SEEK_SET) ? 0
+                                   : my_data->size;
  if (offset > my_data->size) return (toff_t)-1;
  my_data->pos = offset;
  return offset;
@@ -120,7 +120,7 @@ static tsize_t MyRead(thandle_t opaque, void* dst, tsize_t size) {

 // Unmultiply Argb data. Taken from dsp/alpha_processing
 // (we don't want to force a dependency to a libdspdec library).
-#define MFIX 24    // 24bit fixed-point arithmetic
+#define MFIX 24  // 24bit fixed-point arithmetic
 #define HALF ((1u << MFIX) >> 1)

 static uint32_t Unmult(uint8_t x, uint32_t mult) {
@@ -128,9 +128,7 @@ static uint32_t Unmult(uint8_t x, uint32_t mult) {
  return (v > 255u) ? 255u : v;
 }

-static WEBP_INLINE uint32_t GetScale(uint32_t a) {
-  return (255u << MFIX) / a;
-}
+static WEBP_INLINE uint32_t GetScale(uint32_t a) { return (255u << MFIX) / a; }

 #undef MFIX
 #undef HALF
@@ -140,7 +138,7 @@ static void MultARGBRow(uint8_t* ptr, int width) {
  for (x = 0; x < width; ++x, ptr += 4) {
    const uint32_t alpha = ptr[3];
    if (alpha < 255) {
-      if (alpha == 0) {   // alpha == 0
+      if (alpha == 0) {  // alpha == 0
        ptr[0] = ptr[1] = ptr[2] = 0;
      } else {
        const uint32_t scale = GetScale(alpha);
@@ -153,9 +151,8 @@ static void MultARGBRow(uint8_t* ptr, int width) {
 }

 int ReadTIFF(const uint8_t* const data, size_t data_size,
-             WebPPicture* const pic, int keep_alpha,
-             Metadata* const metadata) {
-  MyData my_data = { data, (toff_t)data_size, 0 };
+             WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
+  MyData my_data = {data, (toff_t)data_size, 0};
  TIFF* tif;
  uint32_t image_width, image_height, tile_width, tile_height;
  uint64_t stride;
@@ -171,8 +168,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
    return 0;
  }

-  tif = TIFFClientOpen("Memory", "r", &my_data,
-                       MyRead, MyRead, MySeek, MyClose,
+  tif = TIFFClientOpen("Memory", "r", &my_data, MyRead, MyRead, MySeek, MyClose,
                       MySize, MyMapFile, MyUnmapFile);
  if (tif == NULL) {
    fprintf(stderr, "Error! Cannot parse TIFF file\n");
@@ -181,9 +177,10 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,

  dircount = TIFFNumberOfDirectories(tif);
  if (dircount > 1) {
-    fprintf(stderr, "Warning: multi-directory TIFF files are not supported.\n"
-                    "Only the first will be used, %d will be ignored.\n",
-                    dircount - 1);
+    fprintf(stderr,
+            "Warning: multi-directory TIFF files are not supported.\n"
+            "Only the first will be used, %d will be ignored.\n",
+            dircount - 1);
  }
  if (!TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &samples_per_px)) {
    fprintf(stderr, "Error! Cannot retrieve TIFF samples-per-pixel info.\n");
@@ -253,9 +250,10 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
          tmp += stride;
        }
      }
-      ok = keep_alpha
-         ? WebPPictureImportRGBA(pic, (const uint8_t*)raster, (int)stride)
-         : WebPPictureImportRGBX(pic, (const uint8_t*)raster, (int)stride);
+      ok =
+          keep_alpha
+              ? WebPPictureImportRGBA(pic, (const uint8_t*)raster, (int)stride)
+              : WebPPictureImportRGBX(pic, (const uint8_t*)raster, (int)stride);
    }
    _TIFFfree(raster);
  } else {
@@ -272,11 +270,11 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
      }
    }
  }
- End:
+End:
  TIFFClose(tif);
  return ok;
 }
-#else  // !WEBP_HAVE_TIFF
+#else   // !WEBP_HAVE_TIFF
 int ReadTIFF(const uint8_t* const data, size_t data_size,
             struct WebPPicture* const pic, int keep_alpha,
             struct Metadata* const metadata) {
@@ -285,7 +283,8 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
-  fprintf(stderr, "TIFF support not compiled. Please install the libtiff "
+  fprintf(stderr,
+          "TIFF support not compiled. Please install the libtiff "
          "development package before building.\n");
  return 0;
 }
--- a/imageio/tiffdec.h
+++ b/imageio/tiffdec.h
@@ -33,7 +33,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
             struct Metadata* const metadata);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_TIFFDEC_H_
--- a/imageio/webpdec.c
+++ b/imageio/webpdec.c
@@ -31,9 +31,14 @@
 // WebP decoding

 static const char* const kStatusMessages[VP8_STATUS_NOT_ENOUGH_DATA + 1] = {
-  "OK", "OUT_OF_MEMORY", "INVALID_PARAM", "BITSTREAM_ERROR",
-  "UNSUPPORTED_FEATURE", "SUSPENDED", "USER_ABORT", "NOT_ENOUGH_DATA"
-};
+    "OK",
+    "OUT_OF_MEMORY",
+    "INVALID_PARAM",
+    "BITSTREAM_ERROR",
+    "UNSUPPORTED_FEATURE",
+    "SUSPENDED",
+    "USER_ABORT",
+    "NOT_ENOUGH_DATA"};

 static void PrintAnimationWarning(const WebPDecoderConfig* const config) {
  if (config->input.has_animation) {
@@ -53,8 +58,7 @@ void PrintWebPError(const char* const in_file, int status) {
  fprintf(stderr, "\n");
 }

-int LoadWebP(const char* const in_file,
-             const uint8_t** data, size_t* data_size,
+int LoadWebP(const char* const in_file, const uint8_t** data, size_t* data_size,
             WebPBitstreamFeatures* bitstream) {
  VP8StatusCode status;
  WebPBitstreamFeatures local_features;
@@ -84,9 +88,8 @@ VP8StatusCode DecodeWebP(const uint8_t* const data, size_t data_size,
  return WebPDecode(data, data_size, config);
 }

-VP8StatusCode DecodeWebPIncremental(
-    const uint8_t* const data, size_t data_size,
-    WebPDecoderConfig* const config) {
+VP8StatusCode DecodeWebPIncremental(const uint8_t* const data, size_t data_size,
+                                    WebPDecoderConfig* const config) {
  VP8StatusCode status = VP8_STATUS_OK;
  if (config == NULL) return VP8_STATUS_INVALID_PARAM;

@@ -111,7 +114,7 @@ VP8StatusCode DecodeWebPIncremental(

 static int ExtractMetadata(const uint8_t* const data, size_t data_size,
                           Metadata* const metadata) {
-  WebPData webp_data = { data, data_size };
+  WebPData webp_data = {data, data_size};
  WebPDemuxer* const demux = WebPDemux(&webp_data);
  WebPChunkIterator chunk_iter;
  uint32_t flags;
@@ -143,8 +146,7 @@ static int ExtractMetadata(const uint8_t* const data, size_t data_size,
 // -----------------------------------------------------------------------------

 int ReadWebP(const uint8_t* const data, size_t data_size,
-             WebPPicture* const pic,
-             int keep_alpha, Metadata* const metadata) {
+             WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
  int ok = 0;
  VP8StatusCode status = VP8_STATUS_OK;
  WebPDecoderConfig config;
@@ -223,7 +225,7 @@ int ReadWebP(const uint8_t* const data, size_t data_size,
        argb += pic->argb_stride;
      }
    }
-  } while (0);   // <- so we can 'break' out of the loop
+  } while (0);  // <- so we can 'break' out of the loop

  if (status != VP8_STATUS_OK) {
    PrintWebPError("input data", status);
--- a/imageio/webpdec.h
+++ b/imageio/webpdec.h
@@ -35,8 +35,7 @@ void PrintWebPError(const char* const in_file, int status);
 // Reads a WebP from 'in_file', returning the contents and size in 'data' and
 // 'data_size'. If not NULL, 'bitstream' is populated using WebPGetFeatures().
 // Returns true on success.
-int LoadWebP(const char* const in_file,
-             const uint8_t** data, size_t* data_size,
+int LoadWebP(const char* const in_file, const uint8_t** data, size_t* data_size,
             WebPBitstreamFeatures* bitstream);

 // Decodes the WebP contained in 'data'.
@@ -48,9 +47,8 @@ VP8StatusCode DecodeWebP(const uint8_t* const data, size_t data_size,
                         WebPDecoderConfig* const config);

 // Same as DecodeWebP(), but using the incremental decoder.
-VP8StatusCode DecodeWebPIncremental(
-    const uint8_t* const data, size_t data_size,
-    WebPDecoderConfig* const config);
+VP8StatusCode DecodeWebPIncremental(const uint8_t* const data, size_t data_size,
+                                    WebPDecoderConfig* const config);

 //------------------------------------------------------------------------------

@@ -60,11 +58,11 @@ VP8StatusCode DecodeWebPIncremental(
 // or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
 // Returns true on success.
 int ReadWebP(const uint8_t* const data, size_t data_size,
-             struct WebPPicture* const pic,
-             int keep_alpha, struct Metadata* const metadata);
+             struct WebPPicture* const pic, int keep_alpha,
+             struct Metadata* const metadata);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_WEBPDEC_H_
--- a/imageio/wicdec.c
+++ b/imageio/wicdec.c
@@ -25,31 +25,31 @@
 #endif
 #define CINTERFACE
 #define COBJMACROS
-#define _WIN32_IE 0x500  // Workaround bug in shlwapi.h when compiling C++
-                         // code with COBJMACROS.
+#define _WIN32_IE \
+  0x500            // Workaround bug in shlwapi.h when compiling C++
+                   // code with COBJMACROS.
 #include <ole2.h>  // CreateStreamOnHGlobal()
 #include <shlwapi.h>
 #include <tchar.h>
-#include <windows.h>
 #include <wincodec.h>
+#include <windows.h>

 #include "../examples/unicode.h"
 #include "./imageio_util.h"
 #include "./metadata.h"
 #include "webp/encode.h"

-#define IFS(fn)                                                     \
-  do {                                                              \
-    if (SUCCEEDED(hr)) {                                            \
-      hr = (fn);                                                    \
-      if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr);   \
-    }                                                               \
+#define IFS(fn)                                                   \
+  do {                                                            \
+    if (SUCCEEDED(hr)) {                                          \
+      hr = (fn);                                                  \
+      if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr); \
+    }                                                             \
  } while (0)

 // modified version of DEFINE_GUID from guiddef.h.
 #define WEBP_DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
-  static const GUID name = \
-      { l, w1, w2, { b1, b2,  b3,  b4,  b5,  b6,  b7,  b8 } }
+  static const GUID name = {l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}

 #ifdef __cplusplus
 #define MAKE_REFGUID(x) (x)
@@ -66,23 +66,17 @@ typedef struct WICFormatImporter {
 // From Microsoft SDK 7.0a -- wincodec.h
 // Create local copies for compatibility when building against earlier
 // versions of the SDK.
-WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppBGR_,
-                 0x6fddc324, 0x4e03, 0x4bfe,
-                 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0c);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppRGB_,
-                 0x6fddc324, 0x4e03, 0x4bfe,
-                 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0d);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppBGRA_,
-                 0x6fddc324, 0x4e03, 0x4bfe,
+WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppBGR_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1,
+                 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0c);
+WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppRGB_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1,
+                 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0d);
+WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppBGRA_, 0x6fddc324, 0x4e03, 0x4bfe,
                 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0f);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppRGBA_,
-                 0xf5c7ad2d, 0x6a8d, 0x43dd,
+WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppRGBA_, 0xf5c7ad2d, 0x6a8d, 0x43dd,
                 0xa7, 0xa8, 0xa2, 0x99, 0x35, 0x26, 0x1a, 0xe9);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppBGRA_,
-                 0x1562ff7c, 0xd352, 0x46f9,
+WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppBGRA_, 0x1562ff7c, 0xd352, 0x46f9,
                 0x97, 0x9e, 0x42, 0x97, 0x6b, 0x79, 0x22, 0x46);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppRGBA_,
-                 0x6fddc324, 0x4e03, 0x4bfe,
+WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppRGBA_, 0x6fddc324, 0x4e03, 0x4bfe,
                 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x16);

 static HRESULT OpenInputStream(const char* filename, IStream** stream) {
@@ -147,8 +141,7 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,

  if (SUCCEEDED(hr)) {
    UINT num_color_contexts;
-    IFS(IWICBitmapFrameDecode_GetColorContexts(frame,
-                                               count, color_contexts,
+    IFS(IWICBitmapFrameDecode_GetColorContexts(frame, count, color_contexts,
                                               &num_color_contexts));
    assert(FAILED(hr) || num_color_contexts <= count);
    for (i = 0; SUCCEEDED(hr) && i < num_color_contexts; ++i) {
@@ -156,8 +149,8 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
      IFS(IWICColorContext_GetType(color_contexts[i], &type));
      if (SUCCEEDED(hr) && type == WICColorContextProfile) {
        UINT size;
-        IFS(IWICColorContext_GetProfileBytes(color_contexts[i],
-                                             0, NULL, &size));
+        IFS(IWICColorContext_GetProfileBytes(color_contexts[i], 0, NULL,
+                                             &size));
        if (SUCCEEDED(hr) && size > 0) {
          iccp->bytes = (uint8_t*)malloc(size);
          if (iccp->bytes == NULL) {
@@ -165,9 +158,8 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
            break;
          }
          iccp->size = size;
-          IFS(IWICColorContext_GetProfileBytes(color_contexts[i],
-                                               (UINT)iccp->size, iccp->bytes,
-                                               &size));
+          IFS(IWICColorContext_GetProfileBytes(
+              color_contexts[i], (UINT)iccp->size, iccp->bytes, &size));
          if (SUCCEEDED(hr) && size != iccp->size) {
            fprintf(stderr, "Warning! ICC profile size (%u) != expected (%u)\n",
                    size, (uint32_t)iccp->size);
@@ -209,8 +201,7 @@ static int HasPalette(GUID pixel_format) {

 static int HasAlpha(IWICImagingFactory* const factory,
                    IWICBitmapDecoder* const decoder,
-                    IWICBitmapFrameDecode* const frame,
-                    GUID pixel_format) {
+                    IWICBitmapFrameDecode* const frame, GUID pixel_format) {
  int has_alpha;
  if (HasPalette(pixel_format)) {
    IWICPalette* frame_palette = NULL;
@@ -245,21 +236,20 @@ static int HasAlpha(IWICImagingFactory* const factory,
  return has_alpha;
 }

-int ReadPictureWithWIC(const char* const filename,
-                       WebPPicture* const pic, int keep_alpha,
-                       Metadata* const metadata) {
+int ReadPictureWithWIC(const char* const filename, WebPPicture* const pic,
+                       int keep_alpha, Metadata* const metadata) {
  // From Microsoft SDK 6.0a -- ks.h
  // Define a local copy to avoid link errors under mingw.
  WEBP_DEFINE_GUID(GUID_NULL_, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
  static const WICFormatImporter kAlphaFormatImporters[] = {
-    { &GUID_WICPixelFormat32bppBGRA_, 4, WebPPictureImportBGRA },
-    { &GUID_WICPixelFormat32bppRGBA_, 4, WebPPictureImportRGBA },
-    { NULL, 0, NULL },
+      {&GUID_WICPixelFormat32bppBGRA_, 4, WebPPictureImportBGRA},
+      {&GUID_WICPixelFormat32bppRGBA_, 4, WebPPictureImportRGBA},
+      {NULL, 0, NULL},
  };
  static const WICFormatImporter kNonAlphaFormatImporters[] = {
-    { &GUID_WICPixelFormat24bppBGR_, 3, WebPPictureImportBGR },
-    { &GUID_WICPixelFormat24bppRGB_, 3, WebPPictureImportRGB },
-    { NULL, 0, NULL },
+      {&GUID_WICPixelFormat24bppBGR_, 3, WebPPictureImportBGR},
+      {&GUID_WICPixelFormat24bppRGB_, 3, WebPPictureImportRGB},
+      {NULL, 0, NULL},
  };
  HRESULT hr = S_OK;
  IWICBitmapFrameDecode* frame = NULL;
@@ -274,26 +264,20 @@ int ReadPictureWithWIC(const char* const filename,
  const WICFormatImporter* importer = NULL;
  GUID src_container_format = GUID_NULL_;
  // From Windows Kits\10\Include\10.0.19041.0\um\wincodec.h
-  WEBP_DEFINE_GUID(GUID_ContainerFormatWebp_,
-                   0xe094b0e2, 0x67f2, 0x45b3,
-                   0xb0, 0xea, 0x11, 0x53, 0x37, 0xca, 0x7c, 0xf3);
+  WEBP_DEFINE_GUID(GUID_ContainerFormatWebp_, 0xe094b0e2, 0x67f2, 0x45b3, 0xb0,
+                   0xea, 0x11, 0x53, 0x37, 0xca, 0x7c, 0xf3);
  static const GUID* kAlphaContainers[] = {
-    &GUID_ContainerFormatBmp,
-    &GUID_ContainerFormatPng,
-    &GUID_ContainerFormatTiff,
-    &GUID_ContainerFormatWebp_,
-    NULL
-  };
+      &GUID_ContainerFormatBmp, &GUID_ContainerFormatPng,
+      &GUID_ContainerFormatTiff, &GUID_ContainerFormatWebp_, NULL};
  int has_alpha = 0;
  int64_t stride;

  if (filename == NULL || pic == NULL) return 0;

  IFS(CoInitialize(NULL));
-  IFS(CoCreateInstance(MAKE_REFGUID(CLSID_WICImagingFactory), NULL,
-                       CLSCTX_INPROC_SERVER,
-                       MAKE_REFGUID(IID_IWICImagingFactory),
-                       (LPVOID*)&factory));
+  IFS(CoCreateInstance(
+      MAKE_REFGUID(CLSID_WICImagingFactory), NULL, CLSCTX_INPROC_SERVER,
+      MAKE_REFGUID(IID_IWICImagingFactory), (LPVOID*)&factory));
  if (hr == REGDB_E_CLASSNOTREG) {
    fprintf(stderr,
            "Couldn't access Windows Imaging Component (are you running "
@@ -303,8 +287,7 @@ int ReadPictureWithWIC(const char* const filename,
  // Prepare for image decoding.
  IFS(OpenInputStream(filename, &stream));
  IFS(IWICImagingFactory_CreateDecoderFromStream(
-          factory, stream, NULL,
-          WICDecodeMetadataCacheOnDemand, &decoder));
+      factory, stream, NULL, WICDecodeMetadataCacheOnDemand, &decoder));
  IFS(IWICBitmapDecoder_GetFrameCount(decoder, &frame_count));
  if (SUCCEEDED(hr)) {
    if (frame_count == 0) {
@@ -338,18 +321,15 @@ int ReadPictureWithWIC(const char* const filename,
       hr == S_OK && importer->import != NULL; ++importer) {
    BOOL can_convert;
    const HRESULT cchr = IWICFormatConverter_CanConvert(
-        converter,
-        MAKE_REFGUID(src_pixel_format),
-        MAKE_REFGUID(*importer->pixel_format),
-        &can_convert);
+        converter, MAKE_REFGUID(src_pixel_format),
+        MAKE_REFGUID(*importer->pixel_format), &can_convert);
    if (SUCCEEDED(cchr) && can_convert) break;
  }
  if (importer->import == NULL) hr = E_FAIL;

-  IFS(IWICFormatConverter_Initialize(converter, (IWICBitmapSource*)frame,
-                                     importer->pixel_format,
-                                     WICBitmapDitherTypeNone,
-                                     NULL, 0.0, WICBitmapPaletteTypeCustom));
+  IFS(IWICFormatConverter_Initialize(
+      converter, (IWICBitmapSource*)frame, importer->pixel_format,
+      WICBitmapDitherTypeNone, NULL, 0.0, WICBitmapPaletteTypeCustom));

  // Decode.
  IFS(IWICFormatConverter_GetSize(converter, &width, &height));
@@ -361,18 +341,17 @@ int ReadPictureWithWIC(const char* const filename,

  if (SUCCEEDED(hr)) {
    rgb = (BYTE*)malloc((size_t)stride * height);
-    if (rgb == NULL)
-      hr = E_OUTOFMEMORY;
+    if (rgb == NULL) hr = E_OUTOFMEMORY;
  }
-  IFS(IWICFormatConverter_CopyPixels(converter, NULL,
-                                     (UINT)stride, (UINT)stride * height, rgb));
+  IFS(IWICFormatConverter_CopyPixels(converter, NULL, (UINT)stride,
+                                     (UINT)stride * height, rgb));

  // WebP conversion.
  if (SUCCEEDED(hr)) {
    int ok;
    pic->width = width;
    pic->height = height;
-    pic->use_argb = 1;    // For WIC, we always force to argb
+    pic->use_argb = 1;  // For WIC, we always force to argb
    ok = importer->import(pic, rgb, (int)stride);
    if (!ok) hr = E_FAIL;
  }
@@ -394,7 +373,7 @@ int ReadPictureWithWIC(const char* const filename,
  free(rgb);
  return SUCCEEDED(hr);
 }
-#else  // !HAVE_WINCODEC_H
+#else   // !HAVE_WINCODEC_H
 int ReadPictureWithWIC(const char* const filename,
                       struct WebPPicture* const pic, int keep_alpha,
                       struct Metadata* const metadata) {
@@ -402,10 +381,11 @@ int ReadPictureWithWIC(const char* const filename,
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
-  fprintf(stderr, "Windows Imaging Component (WIC) support not compiled. "
-                  "Visual Studio and mingw-w64 builds support WIC. Make sure "
-                  "wincodec.h detection is working correctly if using autoconf "
-                  "and HAVE_WINCODEC_H is defined before building.\n");
+  fprintf(stderr,
+          "Windows Imaging Component (WIC) support not compiled. "
+          "Visual Studio and mingw-w64 builds support WIC. Make sure "
+          "wincodec.h detection is working correctly if using autoconf "
+          "and HAVE_WINCODEC_H is defined before building.\n");
  return 0;
 }
 #endif  // HAVE_WINCODEC_H
--- a/imageio/wicdec.h
+++ b/imageio/wicdec.h
@@ -28,7 +28,7 @@ int ReadPictureWithWIC(const char* const filename,
                       struct Metadata* const metadata);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_IMAGEIO_WICDEC_H_
--- a/sharpyuv/sharpyuv.c
+++ b/sharpyuv/sharpyuv.c
@@ -26,9 +26,7 @@

 //------------------------------------------------------------------------------

-int SharpYuvGetVersion(void) {
-  return SHARPYUV_VERSION;
-}
+int SharpYuvGetVersion(void) { return SHARPYUV_VERSION; }

 //------------------------------------------------------------------------------
 // Sharp RGB->YUV conversion
@@ -49,8 +47,8 @@ static int GetPrecisionShift(int rgb_bit_depth) {
                                               : (kMaxBitDepth - rgb_bit_depth);
 }

-typedef int16_t fixed_t;      // signed type with extra precision for UV
-typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W
+typedef int16_t fixed_t;     // signed type with extra precision for UV
+typedef uint16_t fixed_y_t;  // unsigned type with extra precision for W

 //------------------------------------------------------------------------------

@@ -121,7 +119,7 @@ static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
    dst[0 * uv_w] = (fixed_t)(r - W);
    dst[1 * uv_w] = (fixed_t)(g - W);
    dst[2 * uv_w] = (fixed_t)(b - W);
-    dst  += 1;
+    dst += 1;
    src1 += 2;
    src2 += 2;
  } while (++i < uv_w);
@@ -148,12 +146,9 @@ static WEBP_INLINE int Shift(int v, int shift) {
  return (shift >= 0) ? (v << shift) : (v >> -shift);
 }

-static void ImportOneRow(const uint8_t* const r_ptr,
-                         const uint8_t* const g_ptr,
-                         const uint8_t* const b_ptr,
-                         int rgb_step,
-                         int rgb_bit_depth,
-                         int pic_width,
+static void ImportOneRow(const uint8_t* const r_ptr, const uint8_t* const g_ptr,
+                         const uint8_t* const b_ptr, int rgb_step,
+                         int rgb_bit_depth, int pic_width,
                         fixed_y_t* const dst) {
  // Convert the rgb_step from a number of bytes to a number of uint8_t or
  // uint16_t values depending the bit depth.
@@ -181,18 +176,14 @@ static void ImportOneRow(const uint8_t* const r_ptr,
 }

 static void InterpolateTwoRows(const fixed_y_t* const best_y,
-                               const fixed_t* prev_uv,
-                               const fixed_t* cur_uv,
-                               const fixed_t* next_uv,
-                               int w,
-                               fixed_y_t* out1,
-                               fixed_y_t* out2,
-                               int rgb_bit_depth) {
+                               const fixed_t* prev_uv, const fixed_t* cur_uv,
+                               const fixed_t* next_uv, int w, fixed_y_t* out1,
+                               fixed_y_t* out2, int rgb_bit_depth) {
  const int uv_w = w >> 1;
-  const int len = (w - 1) >> 1;   // length to filter
+  const int len = (w - 1) >> 1;  // length to filter
  int k = 3;
  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
-  while (k-- > 0) {   // process each R/G/B segments in turn
+  while (k-- > 0) {  // process each R/G/B segments in turn
    // special boundary case for i==0
    out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
    out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
@@ -212,7 +203,7 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y,
    out1 += w;
    out2 += w;
    prev_uv += uv_w;
-    cur_uv  += uv_w;
+    cur_uv += uv_w;
    next_uv += uv_w;
  }
 }
@@ -220,16 +211,16 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y,
 static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
                                         const int coeffs[4], int sfix) {
  const int srounder = 1 << (YUV_FIX + sfix - 1);
-  const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
-                   coeffs[3] + srounder;
+  const int luma =
+      coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + coeffs[3] + srounder;
  return (luma >> (YUV_FIX + sfix));
 }

 static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
                            uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
                            int u_stride, uint8_t* v_ptr, int v_stride,
-                            int rgb_bit_depth,
-                            int yuv_bit_depth, int width, int height,
+                            int rgb_bit_depth, int yuv_bit_depth, int width,
+                            int height,
                            const SharpYuvConversionMatrix* yuv_matrix) {
  int i, j;
  const fixed_t* const best_uv_base = best_uv;
@@ -319,7 +310,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,

  // TODO(skal): allocate one big memory chunk. But for now, it's easier
  // for valgrind debugging to have several chunks.
-  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
+  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);  // scratch
  fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
  fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
  fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
@@ -335,9 +326,8 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
  assert(w > 0);
  assert(h > 0);

-  if (best_y_base == NULL || best_uv_base == NULL ||
-      target_y_base == NULL || target_uv_base == NULL ||
-      best_rgb_y == NULL || best_rgb_uv == NULL ||
+  if (best_y_base == NULL || best_uv_base == NULL || target_y_base == NULL ||
+      target_uv_base == NULL || best_rgb_y == NULL || best_rgb_uv == NULL ||
      tmp_buffer == NULL) {
    ok = 0;
    goto End;
@@ -350,8 +340,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
    fixed_y_t* const src2 = tmp_buffer + 3 * w;

    // prepare two rows of input
-    ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
-                 src1);
+    ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width, src1);
    if (!is_last_row) {
      ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
                   rgb_step, rgb_bit_depth, width, src2);
@@ -390,8 +379,8 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
      fixed_y_t* const src2 = tmp_buffer + 3 * w;
      {
        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
-        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
-                           src1, src2, rgb_bit_depth);
+        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2,
+                           rgb_bit_depth);
        prev_uv = cur_uv;
        cur_uv = next_uv;
      }
@@ -424,7 +413,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
                        u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
                        width, height, yuv_matrix);

- End:
+End:
  free(best_y_base);
  free(best_uv_base);
  free(target_y_base);
@@ -440,16 +429,18 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
 #if defined(WEBP_USE_THREAD) && !defined(_WIN32)
 #include <pthread.h>  // NOLINT

-#define LOCK_ACCESS \
-    static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
-    if (pthread_mutex_lock(&sharpyuv_lock)) return
-#define UNLOCK_ACCESS_AND_RETURN                  \
-    do {                                          \
-      (void)pthread_mutex_unlock(&sharpyuv_lock); \
-      return;                                     \
-    } while (0)
+#define LOCK_ACCESS                                                 \
+  static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
+  if (pthread_mutex_lock(&sharpyuv_lock)) return
+#define UNLOCK_ACCESS_AND_RETURN                \
+  do {                                          \
+    (void)pthread_mutex_unlock(&sharpyuv_lock); \
+    return;                                     \
+  } while (0)
 #else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
-#define LOCK_ACCESS do {} while (0)
+#define LOCK_ACCESS \
+  do {              \
+  } while (0)
 #define UNLOCK_ACCESS_AND_RETURN return
 #endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)

--- a/sharpyuv/sharpyuv_csp.c
+++ b/sharpyuv/sharpyuv_csp.c
@@ -67,33 +67,33 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
 // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
 // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
 static const SharpYuvConversionMatrix kWebpMatrix = {
-  {16839, 33059, 6420, 16 << 16},
-  {-9719, -19081, 28800, 128 << 16},
-  {28800, -24116, -4684, 128 << 16},
+    {16839, 33059, 6420, 16 << 16},
+    {-9719, -19081, 28800, 128 << 16},
+    {28800, -24116, -4684, 128 << 16},
 };
 // Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeLimited
 static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
-  {16829, 33039, 6416, 16 << 16},
-  {-9714, -19071, 28784, 128 << 16},
-  {28784, -24103, -4681, 128 << 16},
+    {16829, 33039, 6416, 16 << 16},
+    {-9714, -19071, 28784, 128 << 16},
+    {28784, -24103, -4681, 128 << 16},
 };
 // Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeFull
 static const SharpYuvConversionMatrix kRec601FullMatrix = {
-  {19595, 38470, 7471, 0},
-  {-11058, -21710, 32768, 128 << 16},
-  {32768, -27439, -5329, 128 << 16},
+    {19595, 38470, 7471, 0},
+    {-11058, -21710, 32768, 128 << 16},
+    {32768, -27439, -5329, 128 << 16},
 };
 // Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeLimited
 static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
-  {11966, 40254, 4064, 16 << 16},
-  {-6596, -22189, 28784, 128 << 16},
-  {28784, -26145, -2639, 128 << 16},
+    {11966, 40254, 4064, 16 << 16},
+    {-6596, -22189, 28784, 128 << 16},
+    {28784, -26145, -2639, 128 << 16},
 };
 // Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeFull
 static const SharpYuvConversionMatrix kRec709FullMatrix = {
-  {13933, 46871, 4732, 0},
-  {-7509, -25259, 32768, 128 << 16},
-  {32768, -29763, -3005, 128 << 16},
+    {13933, 46871, 4732, 0},
+    {-7509, -25259, 32768, 128 << 16},
+    {32768, -29763, -3005, 128 << 16},
 };

 const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
--- a/sharpyuv/sharpyuv_csp.h
+++ b/sharpyuv/sharpyuv_csp.h
@@ -20,8 +20,8 @@ extern "C" {

 // Range of YUV values.
 typedef enum {
-  kSharpYuvRangeFull,     // YUV values between [0;255] (for 8 bit)
-  kSharpYuvRangeLimited   // Y in [16;235], YUV in [16;240] (for 8 bit)
+  kSharpYuvRangeFull,    // YUV values between [0;255] (for 8 bit)
+  kSharpYuvRangeLimited  // Y in [16;235], YUV in [16;240] (for 8 bit)
 } SharpYuvRange;

 // Constants that define a YUV color space.
--- a/sharpyuv/sharpyuv_gamma.c
+++ b/sharpyuv/sharpyuv_gamma.c
@@ -67,8 +67,7 @@ void SharpYuvInitGammaTables(void) {
        } else {
          value = (1. + a) * pow(g, 1. / kGammaF) - a;
        }
-        kLinearToGammaTabS[v] =
-            (uint32_t)(final_scale * value + 0.5);
+        kLinearToGammaTabS[v] = (uint32_t)(final_scale * value + 0.5);
      }
      // to prevent small rounding errors to cause read-overflow:
      kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
@@ -198,7 +197,7 @@ static float ToLinearLog100(float gamma) {
  // The function is non-bijective so choose the middle of [0, 0.01].
  const float mid_interval = 0.01f / 2.f;
  return (gamma <= 0.0f) ? mid_interval
-                          : Powf(10.0f, 2.f * (MIN(gamma, 1.f) - 1.0f));
+                         : Powf(10.0f, 2.f * (MIN(gamma, 1.f) - 1.0f));
 }

 static float FromLinearLog100(float linear) {
@@ -209,12 +208,12 @@ static float ToLinearLog100Sqrt10(float gamma) {
  // The function is non-bijective so choose the middle of [0, 0.00316227766f[.
  const float mid_interval = 0.00316227766f / 2.f;
  return (gamma <= 0.0f) ? mid_interval
-                          : Powf(10.0f, 2.5f * (MIN(gamma, 1.f) - 1.0f));
+                         : Powf(10.0f, 2.5f * (MIN(gamma, 1.f) - 1.0f));
 }

 static float FromLinearLog100Sqrt10(float linear) {
  return (linear < 0.00316227766f) ? 0.0f
-                                  : 1.0f + Log10f(MIN(linear, 1.f)) / 2.5f;
+                                   : 1.0f + Log10f(MIN(linear, 1.f)) / 2.5f;
 }

 static float ToLinearIec61966(float gamma) {
--- a/sharpyuv/sharpyuv_neon.c
+++ b/sharpyuv/sharpyuv_neon.c
@@ -14,9 +14,9 @@
 #include "sharpyuv/sharpyuv_dsp.h"

 #if defined(WEBP_USE_NEON)
+#include <arm_neon.h>
 #include <assert.h>
 #include <stdlib.h>
-#include <arm_neon.h>

 static uint16_t clip_NEON(int v, int max) {
  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
@@ -35,11 +35,11 @@ static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
    const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
    const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
    const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
-    const int16x8_t D = vsubq_s16(A, B);       // diff_y
-    const int16x8_t F = vaddq_s16(C, D);       // new_y
+    const int16x8_t D = vsubq_s16(A, B);  // diff_y
+    const int16x8_t F = vaddq_s16(C, D);  // new_y
    const uint16x8_t H =
        vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
-    const int16x8_t I = vabsq_s16(D);          // abs(diff_y)
+    const int16x8_t I = vabsq_s16(D);  // abs(diff_y)
    vst1q_u16(dst + i, H);
    sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
  }
@@ -60,8 +60,8 @@ static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
    const int16x8_t A = vld1q_s16(ref + i);
    const int16x8_t B = vld1q_s16(src + i);
    const int16x8_t C = vld1q_s16(dst + i);
-    const int16x8_t D = vsubq_s16(A, B);   // diff_uv
-    const int16x8_t E = vaddq_s16(C, D);   // new_uv
+    const int16x8_t D = vsubq_s16(A, B);  // diff_uv
+    const int16x8_t E = vaddq_s16(C, D);  // new_uv
    vst1q_s16(dst + i, E);
  }
  for (; i < len; ++i) {
--- a/sharpyuv/sharpyuv_sse2.c
+++ b/sharpyuv/sharpyuv_sse2.c
@@ -15,7 +15,6 @@

 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>
-
 #include <stdlib.h>

 #include "src/dsp/cpu.h"
@@ -45,7 +44,7 @@ static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
    const __m128i F = _mm_add_epi16(C, D);       // new_y
    const __m128i G = _mm_or_si128(E, one);      // -1 or 1
    const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
-    const __m128i I = _mm_madd_epi16(D, G);      // sum(abs(...))
+    const __m128i I = _mm_madd_epi16(D, G);  // sum(abs(...))
    _mm_storeu_si128((__m128i*)(dst + i), H);
    sum = _mm_add_epi32(sum, I);
  }
@@ -67,8 +66,8 @@ static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
-    const __m128i D = _mm_sub_epi16(A, B);   // diff_uv
-    const __m128i E = _mm_add_epi16(C, D);   // new_uv
+    const __m128i D = _mm_sub_epi16(A, B);  // diff_uv
+    const __m128i E = _mm_add_epi16(C, D);  // new_uv
    _mm_storeu_si128((__m128i*)(dst + i), E);
  }
  for (; i < len; ++i) {
@@ -94,8 +93,8 @@ static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B,
    const __m128i a1b0 = _mm_add_epi16(a1, b0);
    const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0);  // A0+A1+B0+B1
    const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
-    const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);    // 2*(A0+B1)
-    const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);    // 2*(A1+B0)
+    const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);  // 2*(A0+B1)
+    const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);  // 2*(A1+B0)
    const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
    const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
    const __m128i d0 = _mm_add_epi16(c1, a0);
--- a/src/dec/alpha_dec.c
+++ b/src/dec/alpha_dec.c
@@ -79,8 +79,7 @@ WEBP_NODISCARD static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
  if (dec->method < ALPHA_NO_COMPRESSION ||
      dec->method > ALPHA_LOSSLESS_COMPRESSION ||
      dec->filter >= WEBP_FILTER_LAST ||
-      dec->pre_processing > ALPHA_PREPROCESSED_LEVELS ||
-      rsrv != 0) {
+      dec->pre_processing > ALPHA_PREPROCESSED_LEVELS || rsrv != 0) {
    return 0;
  }

@@ -189,7 +188,7 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
  }

  if (!dec->is_alpha_decoded) {
-    if (dec->alph_dec == NULL) {    // Initialize decoder.
+    if (dec->alph_dec == NULL) {  // Initialize decoder.
      dec->alph_dec = ALPHNew();
      if (dec->alph_dec == NULL) {
        VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
@@ -197,20 +196,20 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
        return NULL;
      }
      if (!AllocateAlphaPlane(dec, io)) goto Error;
-      if (!ALPHInit(dec->alph_dec, dec->alpha_data, dec->alpha_data_size,
-                    io, dec->alpha_plane)) {
+      if (!ALPHInit(dec->alph_dec, dec->alpha_data, dec->alpha_data_size, io,
+                    dec->alpha_plane)) {
        VP8LDecoder* const vp8l_dec = dec->alph_dec->vp8l_dec;
-        VP8SetError(dec,
-                    (vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY
-                                       : vp8l_dec->status,
-                    "Alpha decoder initialization failed.");
+        VP8SetError(
+            dec,
+            (vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY : vp8l_dec->status,
+            "Alpha decoder initialization failed.");
        goto Error;
      }
      // if we allowed use of alpha dithering, check whether it's needed at all
      if (dec->alph_dec->pre_processing != ALPHA_PREPROCESSED_LEVELS) {
-        dec->alpha_dithering = 0;    // disable dithering
+        dec->alpha_dithering = 0;  // disable dithering
      } else {
-        num_rows = height - row;     // decode everything in one pass
+        num_rows = height - row;  // decode everything in one pass
      }
    }

@@ -218,16 +217,15 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
    assert(row + num_rows <= height);
    if (!ALPHDecode(dec, row, num_rows)) goto Error;

-    if (dec->is_alpha_decoded) {   // finished?
+    if (dec->is_alpha_decoded) {  // finished?
      ALPHDelete(dec->alph_dec);
      dec->alph_dec = NULL;
      if (dec->alpha_dithering > 0) {
-        uint8_t* const alpha = dec->alpha_plane + io->crop_top * width
-                             + io->crop_left;
-        if (!WebPDequantizeLevels(alpha,
-                                  io->crop_right - io->crop_left,
-                                  io->crop_bottom - io->crop_top,
-                                  width, dec->alpha_dithering)) {
+        uint8_t* const alpha =
+            dec->alpha_plane + io->crop_top * width + io->crop_left;
+        if (!WebPDequantizeLevels(alpha, io->crop_right - io->crop_left,
+                                  io->crop_bottom - io->crop_top, width,
+                                  dec->alpha_dithering)) {
          goto Error;
        }
      }
@@ -237,7 +235,7 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
  // Return a pointer to the current decoded row.
  return dec->alpha_plane + row * width;

- Error:
+Error:
  WebPDeallocateAlphaMemory(dec);
  return NULL;
 }
--- a/src/dec/alphai_dec.h
+++ b/src/dec/alphai_dec.h
@@ -15,10 +15,10 @@
 #define WEBP_DEC_ALPHAI_DEC_H_

 #include "src/dec/vp8_dec.h"
-#include "src/webp/types.h"
 #include "src/dec/webpi_dec.h"
 #include "src/dsp/dsp.h"
 #include "src/utils/filters_utils.h"
+#include "src/webp/types.h"

 #ifdef __cplusplus
 extern "C" {
@@ -35,11 +35,11 @@ struct ALPHDecoder {
  int pre_processing;
  struct VP8LDecoder* vp8l_dec;
  VP8Io io;
-  int use_8b_decode;   // Although alpha channel requires only 1 byte per
-                       // pixel, sometimes VP8LDecoder may need to allocate
-                       // 4 bytes per pixel internally during decode.
+  int use_8b_decode;  // Although alpha channel requires only 1 byte per
+                      // pixel, sometimes VP8LDecoder may need to allocate
+                      // 4 bytes per pixel internally during decode.
  uint8_t* output;
-  const uint8_t* prev_line;   // last output row (or NULL)
+  const uint8_t* prev_line;  // last output row (or NULL)
 };

 //------------------------------------------------------------------------------
@@ -51,7 +51,7 @@ void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
 //------------------------------------------------------------------------------

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_DEC_ALPHAI_DEC_H_
--- a/src/dec/buffer_dec.c
+++ b/src/dec/buffer_dec.c
@@ -26,10 +26,9 @@
 // WebPDecBuffer

 // Number of bytes per pixel for the different color-spaces.
-static const uint8_t kModeBpp[MODE_LAST] = {
-  3, 4, 3, 4, 4, 2, 2,
-  4, 4, 4, 2,    // pre-multiplied modes
-  1, 1 };
+static const uint8_t kModeBpp[MODE_LAST] = {3, 4, 3, 4, 4, 2, 2,  //
+                                            4, 4, 4, 2,  // pre-multiplied modes
+                                            1, 1};

 // Convert to an integer to handle both the unsigned/signed enum cases
 // without the need for casting to remove type limit warnings.
@@ -39,8 +38,8 @@ int IsValidColorspace(int webp_csp_mode) {

 // strictly speaking, the very last (or first, if flipped) row
 // doesn't require padding.
-#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE)       \
-    ((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
+#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \
+  ((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))

 static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
  int ok = 1;
@@ -49,9 +48,9 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
  const int height = buffer->height;
  if (!IsValidColorspace(mode)) {
    ok = 0;
-  } else if (!WebPIsRGBMode(mode)) {   // YUV checks
+  } else if (!WebPIsRGBMode(mode)) {  // YUV checks
    const WebPYUVABuffer* const buf = &buffer->u.YUVA;
-    const int uv_width  = (width  + 1) / 2;
+    const int uv_width = (width + 1) / 2;
    const int uv_height = (height + 1) / 2;
    const int y_stride = abs(buf->y_stride);
    const int u_stride = abs(buf->u_stride);
@@ -75,7 +74,7 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
      ok &= (a_size <= buf->a_size);
      ok &= (buf->a != NULL);
    }
-  } else {    // RGB checks
+  } else {  // RGB checks
    const WebPRGBABuffer* const buf = &buffer->u.RGBA;
    const int stride = abs(buf->stride);
    const uint64_t size =
@@ -127,7 +126,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
    }
    buffer->private_memory = output;

-    if (!WebPIsRGBMode(mode)) {   // YUVA initialization
+    if (!WebPIsRGBMode(mode)) {  // YUVA initialization
      WebPYUVABuffer* const buf = &buffer->u.YUVA;
      buf->y = output;
      buf->y_stride = stride;
@@ -185,14 +184,14 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
  if (buffer == NULL || width <= 0 || height <= 0) {
    return VP8_STATUS_INVALID_PARAM;
  }
-  if (options != NULL) {    // First, apply options if there is any.
+  if (options != NULL) {  // First, apply options if there is any.
    if (options->use_cropping) {
      const int cw = options->crop_width;
      const int ch = options->crop_height;
      const int x = options->crop_left & ~1;
      const int y = options->crop_top & ~1;
      if (!WebPCheckCropDimensions(width, height, x, y, cw, ch)) {
-        return VP8_STATUS_INVALID_PARAM;   // out of frame boundary.
+        return VP8_STATUS_INVALID_PARAM;  // out of frame boundary.
      }
      width = cw;
      height = ch;
@@ -202,14 +201,14 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
 #if !defined(WEBP_REDUCE_SIZE)
      int scaled_width = options->scaled_width;
      int scaled_height = options->scaled_height;
-      if (!WebPRescalerGetScaledDimensions(
-              width, height, &scaled_width, &scaled_height)) {
+      if (!WebPRescalerGetScaledDimensions(width, height, &scaled_width,
+                                           &scaled_height)) {
        return VP8_STATUS_INVALID_PARAM;
      }
      width = scaled_width;
      height = scaled_height;
 #else
-      return VP8_STATUS_INVALID_PARAM;   // rescaling not supported
+      return VP8_STATUS_INVALID_PARAM;  // rescaling not supported
 #endif
    }
  }
@@ -253,7 +252,7 @@ void WebPCopyDecBuffer(const WebPDecBuffer* const src,
  if (src != NULL && dst != NULL) {
    *dst = *src;
    if (src->private_memory != NULL) {
-      dst->is_external_memory = 1;   // dst buffer doesn't own the memory.
+      dst->is_external_memory = 1;  // dst buffer doesn't own the memory.
      dst->private_memory = NULL;
    }
  }
@@ -264,7 +263,7 @@ void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
  if (src != NULL && dst != NULL) {
    *dst = *src;
    if (src->private_memory != NULL) {
-      src->is_external_memory = 1;   // src relinquishes ownership
+      src->is_external_memory = 1;  // src relinquishes ownership
      src->private_memory = NULL;
    }
  }
@@ -289,8 +288,8 @@ VP8StatusCode WebPCopyDecBufferPixels(const WebPDecBuffer* const src_buf,
  } else {
    const WebPYUVABuffer* const src = &src_buf->u.YUVA;
    const WebPYUVABuffer* const dst = &dst_buf->u.YUVA;
-    WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride,
-                  src_buf->width, src_buf->height);
+    WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride, src_buf->width,
+                  src_buf->height);
    WebPCopyPlane(src->u, src->u_stride, dst->u, dst->u_stride,
                  (src_buf->width + 1) / 2, (src_buf->height + 1) / 2);
    WebPCopyPlane(src->v, src->v_stride, dst->v, dst->v_stride,
--- a/src/dec/common_dec.h
+++ b/src/dec/common_dec.h
@@ -15,41 +15,46 @@
 #define WEBP_DEC_COMMON_DEC_H_

 // intra prediction modes
-enum { B_DC_PRED = 0,   // 4x4 modes
-       B_TM_PRED = 1,
-       B_VE_PRED = 2,
-       B_HE_PRED = 3,
-       B_RD_PRED = 4,
-       B_VR_PRED = 5,
-       B_LD_PRED = 6,
-       B_VL_PRED = 7,
-       B_HD_PRED = 8,
-       B_HU_PRED = 9,
-       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
+enum {
+  B_DC_PRED = 0,  // 4x4 modes
+  B_TM_PRED = 1,
+  B_VE_PRED = 2,
+  B_HE_PRED = 3,
+  B_RD_PRED = 4,
+  B_VR_PRED = 5,
+  B_LD_PRED = 6,
+  B_VL_PRED = 7,
+  B_HD_PRED = 8,
+  B_HU_PRED = 9,
+  NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10

-       // Luma16 or UV modes
-       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
-       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
-       B_PRED = NUM_BMODES,   // refined I4x4 mode
-       NUM_PRED_MODES = 4,
+  // Luma16 or UV modes
+  DC_PRED = B_DC_PRED,
+  V_PRED = B_VE_PRED,
+  H_PRED = B_HE_PRED,
+  TM_PRED = B_TM_PRED,
+  B_PRED = NUM_BMODES,  // refined I4x4 mode
+  NUM_PRED_MODES = 4,

-       // special modes
-       B_DC_PRED_NOTOP = 4,
-       B_DC_PRED_NOLEFT = 5,
-       B_DC_PRED_NOTOPLEFT = 6,
-       NUM_B_DC_MODES = 7 };
+  // special modes
+  B_DC_PRED_NOTOP = 4,
+  B_DC_PRED_NOLEFT = 5,
+  B_DC_PRED_NOTOPLEFT = 6,
+  NUM_B_DC_MODES = 7
+};

-enum { MB_FEATURE_TREE_PROBS = 3,
-       NUM_MB_SEGMENTS = 4,
-       NUM_REF_LF_DELTAS = 4,
-       NUM_MODE_LF_DELTAS = 4,    // I4x4, ZERO, *, SPLIT
-       MAX_NUM_PARTITIONS = 8,
-       // Probabilities
-       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
-       NUM_BANDS = 8,
-       NUM_CTX = 3,
-       NUM_PROBAS = 11
-     };
+enum {
+  MB_FEATURE_TREE_PROBS = 3,
+  NUM_MB_SEGMENTS = 4,
+  NUM_REF_LF_DELTAS = 4,
+  NUM_MODE_LF_DELTAS = 4,  // I4x4, ZERO, *, SPLIT
+  MAX_NUM_PARTITIONS = 8,
+  // Probabilities
+  NUM_TYPES = 4,  // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
+  NUM_BANDS = 8,
+  NUM_CTX = 3,
+  NUM_PROBAS = 11
+};

 // Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
 int IsValidColorspace(int webp_csp_mode);
--- a/src/dec/frame_dec.c
+++ b/src/dec/frame_dec.c
@@ -30,11 +30,10 @@
 // Main reconstruction function.

 static const uint16_t kScan[16] = {
-  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
-  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
-  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
-  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
-};
+    0 + 0 * BPS,  4 + 0 * BPS,  8 + 0 * BPS,  12 + 0 * BPS,
+    0 + 4 * BPS,  4 + 4 * BPS,  8 + 4 * BPS,  12 + 4 * BPS,
+    0 + 8 * BPS,  4 + 8 * BPS,  8 + 8 * BPS,  12 + 8 * BPS,
+    0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS};

 static int CheckMode(int mb_x, int mb_y, int mode) {
  if (mode == B_DC_PRED) {
@@ -70,9 +69,9 @@ static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,

 static void DoUVTransform(uint32_t bits, const int16_t* const src,
                          uint8_t* const dst) {
-  if (bits & 0xff) {    // any non-zero coeff at all?
-    if (bits & 0xaa) {  // any non-zero AC coefficient?
-      VP8TransformUV(src, dst);   // note we don't use the AC3 variant for U/V
+  if (bits & 0xff) {             // any non-zero coeff at all?
+    if (bits & 0xaa) {           // any non-zero AC coefficient?
+      VP8TransformUV(src, dst);  // note we don't use the AC3 variant for U/V
    } else {
      VP8TransformDCUV(src, dst);
    }
@@ -138,11 +137,11 @@ static void ReconstructRow(const VP8Decoder* const dec,
      }

      // predict and add residuals
-      if (block->is_i4x4) {   // 4x4
+      if (block->is_i4x4) {  // 4x4
        uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);

        if (mb_y > 0) {
-          if (mb_x >= dec->mb_w - 1) {    // on rightmost border
+          if (mb_x >= dec->mb_w - 1) {  // on rightmost border
            memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
          } else {
            memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
@@ -157,7 +156,7 @@ static void ReconstructRow(const VP8Decoder* const dec,
          VP8PredLuma4[block->imodes[n]](dst);
          DoTransform(bits, coeffs + n * 16, dst);
        }
-      } else {    // 16x16
+      } else {  // 16x16
        const int pred_func = CheckMode(mb_x, mb_y, block->imodes[0]);
        VP8PredLuma16[pred_func](y_dst);
        if (bits != 0) {
@@ -179,8 +178,8 @@ static void ReconstructRow(const VP8Decoder* const dec,
      // stash away top samples for next block
      if (mb_y < dec->mb_h - 1) {
        memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
-        memcpy(top_yuv[0].u, u_dst +  7 * BPS,  8);
-        memcpy(top_yuv[0].v, v_dst +  7 * BPS,  8);
+        memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);
+        memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);
      }
    }
    // Transfer reconstructed samples from yuv_b cache to final destination.
@@ -209,7 +208,7 @@ static void ReconstructRow(const VP8Decoder* const dec,
 // Simple filter:  up to 2 luma samples are read and 1 is written.
 // Complex filter: up to 4 luma samples are read and 3 are written. Same for
 //                 U/V, so it's 8 samples total (because of the 2x upsampling).
-static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
+static const uint8_t kFilterExtraRows[3] = {0, 2, 8};

 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
  const VP8ThreadContext* const ctx = &dec->thread_ctx;
@@ -223,7 +222,7 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
    return;
  }
  assert(limit >= 3);
-  if (dec->filter_type == 1) {   // simple
+  if (dec->filter_type == 1) {  // simple
    if (mb_x > 0) {
      VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
    }
@@ -236,7 +235,7 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
    if (f_info->f_inner) {
      VP8SimpleVFilter16i(y_dst, y_bps, limit);
    }
-  } else {    // complex
+  } else {  // complex
    const int uv_bps = dec->cache_uv_stride;
    uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;
    uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;
@@ -332,9 +331,8 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {

 #define DITHER_AMP_TAB_SIZE 12
 static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
-  // roughly, it's dqm->uv_mat[1]
-  8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
-};
+    // roughly, it's dqm->uv_mat[1]
+    8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1};

 void VP8InitDithering(const WebPDecoderOptions* const options,
                      VP8Decoder* const dec) {
@@ -407,7 +405,7 @@ static void DitherRow(VP8Decoder* const dec) {
 //  * we must clip the remaining pixels against the cropping area. The VP8Io
 //    struct must have the following fields set correctly before calling put():

-#define MACROBLOCK_VPOS(mb_y)  ((mb_y) * 16)    // vertical position of a MB
+#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16)  // vertical position of a MB

 // Finalize and transmit a complete row. Return false in case of user-abort.
 static int FinishRow(void* arg1, void* arg2) {
@@ -458,7 +456,7 @@ static int FinishRow(void* arg1, void* arg2) {
      y_end -= extra_y_rows;
    }
    if (y_end > io->crop_bottom) {
-      y_end = io->crop_bottom;    // make sure we don't overflow on last row.
+      y_end = io->crop_bottom;  // make sure we don't overflow on last row.
    }
    // If dec->alpha_data is not NULL, we have some alpha plane present.
    io->a = NULL;
@@ -512,9 +510,9 @@ static int FinishRow(void* arg1, void* arg2) {
 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
  int ok = 1;
  VP8ThreadContext* const ctx = &dec->thread_ctx;
-  const int filter_row =
-      (dec->filter_type > 0) &&
-      (dec->mb_y >= dec->tl_mb_y) && (dec->mb_y <= dec->br_mb_y);
+  const int filter_row = (dec->filter_type > 0) &&
+                         (dec->mb_y >= dec->tl_mb_y) &&
+                         (dec->mb_y <= dec->br_mb_y);
  if (dec->mt_method == 0) {
    // ctx->id and ctx->f_info are already set
    ctx->mb_y = dec->mb_y;
@@ -526,7 +524,7 @@ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
    // Finish previous job *before* updating context
    ok &= WebPGetWorkerInterface()->Sync(worker);
    assert(worker->status == OK);
-    if (ok) {   // spawn a new deblocking/output job
+    if (ok) {  // spawn a new deblocking/output job
      ctx->io = *io;
      ctx->id = dec->cache_id;
      ctx->mb_y = dec->mb_y;
@@ -539,7 +537,7 @@ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
        // perform reconstruction directly in main thread
        ReconstructRow(dec, ctx);
      }
-      if (filter_row) {            // swap filter info
+      if (filter_row) {  // swap filter info
        VP8FInfo* const tmp = ctx->f_info;
        ctx->f_info = dec->f_info;
        dec->f_info = tmp;
@@ -646,7 +644,7 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
 // io->put:         [ 0..15][16..31][ 0..15][...

 #define MT_CACHE_LINES 3
-#define ST_CACHE_LINES 1   // 1 cache row only for single-threaded case
+#define ST_CACHE_LINES 1  // 1 cache row only for single-threaded case

 // Initialize multi/single-thread worker
 static int InitThreadContext(VP8Decoder* const dec) {
@@ -669,8 +667,8 @@ static int InitThreadContext(VP8Decoder* const dec) {
 }

 int VP8GetThreadMethod(const WebPDecoderOptions* const options,
-                       const WebPHeaderStructure* const headers,
-                       int width, int height) {
+                       const WebPHeaderStructure* const headers, int width,
+                       int height) {
  if (options == NULL || options->use_threads == 0) {
    return 0;
  }
@@ -698,22 +696,23 @@ static int AllocateMemory(VP8Decoder* const dec) {
  const size_t top_size = sizeof(VP8TopSamples) * mb_w;
  const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
  const size_t f_info_size =
-      (dec->filter_type > 0) ?
-          mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)
-        : 0;
+      (dec->filter_type > 0)
+          ? mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)
+          : 0;
  const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b);
  const size_t mb_data_size =
      (dec->mt_method == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data);
-  const size_t cache_height = (16 * num_caches
-                            + kFilterExtraRows[dec->filter_type]) * 3 / 2;
+  const size_t cache_height =
+      (16 * num_caches + kFilterExtraRows[dec->filter_type]) * 3 / 2;
  const size_t cache_size = top_size * cache_height;
  // alpha_size is the only one that scales as width x height.
-  const uint64_t alpha_size = (dec->alpha_data != NULL) ?
-      (uint64_t)dec->pic_hdr.width * dec->pic_hdr.height : 0ULL;
-  const uint64_t needed = (uint64_t)intra_pred_mode_size
-                        + top_size + mb_info_size + f_info_size
-                        + yuv_size + mb_data_size
-                        + cache_size + alpha_size + WEBP_ALIGN_CST;
+  const uint64_t alpha_size =
+      (dec->alpha_data != NULL)
+          ? (uint64_t)dec->pic_hdr.width * dec->pic_hdr.height
+          : 0ULL;
+  const uint64_t needed = (uint64_t)intra_pred_mode_size + top_size +
+                          mb_info_size + f_info_size + yuv_size + mb_data_size +
+                          cache_size + alpha_size + WEBP_ALIGN_CST;
  uint8_t* mem;

  if (!CheckSizeOverflow(needed)) return 0;  // check for overflow
@@ -769,10 +768,10 @@ static int AllocateMemory(VP8Decoder* const dec) {
    const int extra_y = extra_rows * dec->cache_y_stride;
    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride;
    dec->cache_y = mem + extra_y;
-    dec->cache_u = dec->cache_y
-                  + 16 * num_caches * dec->cache_y_stride + extra_uv;
-    dec->cache_v = dec->cache_u
-                  + 8 * num_caches * dec->cache_uv_stride + extra_uv;
+    dec->cache_u =
+        dec->cache_y + 16 * num_caches * dec->cache_y_stride + extra_uv;
+    dec->cache_v =
+        dec->cache_u + 8 * num_caches * dec->cache_uv_stride + extra_uv;
    dec->cache_id = 0;
  }
  mem += cache_size;
@@ -784,7 +783,7 @@ static int AllocateMemory(VP8Decoder* const dec) {

  // note: left/top-info is initialized once for all.
  memset(dec->mb_info - 1, 0, mb_info_size);
-  VP8InitScanline(dec);   // initialize left too.
+  VP8InitScanline(dec);  // initialize left too.

  // initialize top
  memset(dec->intra_t, B_DC_PRED, intra_pred_mode_size);
--- a/src/dec/idec_dec.c
+++ b/src/dec/idec_dec.c
@@ -51,11 +51,7 @@ typedef enum {
 } DecState;

 // Operating state for the MemBuffer
-typedef enum {
-  MEM_MODE_NONE = 0,
-  MEM_MODE_APPEND,
-  MEM_MODE_MAP
-} MemBufferMode;
+typedef enum { MEM_MODE_NONE = 0, MEM_MODE_APPEND, MEM_MODE_MAP } MemBufferMode;

 // storage for partition #0 and partial data (in a rolling fashion)
 typedef struct {
@@ -70,19 +66,19 @@ typedef struct {
 } MemBuffer;

 struct WebPIDecoder {
-  DecState state;         // current decoding state
-  WebPDecParams params;   // Params to store output info
-  int is_lossless;        // for down-casting 'dec'.
-  void* dec;              // either a VP8Decoder or a VP8LDecoder instance
+  DecState state;        // current decoding state
+  WebPDecParams params;  // Params to store output info
+  int is_lossless;       // for down-casting 'dec'.
+  void* dec;             // either a VP8Decoder or a VP8LDecoder instance
  VP8Io io;

-  MemBuffer mem;          // input memory buffer.
-  WebPDecBuffer output;   // output buffer (when no external one is supplied,
-                          // or if the external one has slow-memory)
+  MemBuffer mem;         // input memory buffer.
+  WebPDecBuffer output;  // output buffer (when no external one is supplied,
+                         // or if the external one has slow-memory)
  WebPDecBuffer* final_output;  // Slow-memory output to copy to eventually.
-  size_t chunk_size;      // Compressed VP8/VP8L size extracted from Header.
+  size_t chunk_size;  // Compressed VP8/VP8L size extracted from Header.

-  int last_mb_y;          // last row reached for intra-mode decoding
+  int last_mb_y;  // last row reached for intra-mode decoding
 };

 // MB context to restore in case VP8DecodeMB() fails
@@ -165,7 +161,7 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
          }
        }
      }
-    } else {    // Resize lossless bitreader
+    } else {  // Resize lossless bitreader
      VP8LDecoder* const dec = (VP8LDecoder*)idec->dec;
      VP8LBitReaderSetBuffer(&dec->br, new_base, MemDataSize(mem));
    }
@@ -237,10 +233,10 @@ WEBP_NODISCARD static int RemapMemBuffer(WebPIDecoder* const idec,
 }

 static void InitMemBuffer(MemBuffer* const mem) {
-  mem->mode       = MEM_MODE_NONE;
-  mem->buf        = NULL;
-  mem->buf_size   = 0;
-  mem->part0_buf  = NULL;
+  mem->mode = MEM_MODE_NONE;
+  mem->buf = NULL;
+  mem->buf_size = 0;
+  mem->part0_buf = NULL;
  mem->part0_size = 0;
 }

@@ -255,11 +251,11 @@ static void ClearMemBuffer(MemBuffer* const mem) {
 WEBP_NODISCARD static int CheckMemBufferMode(MemBuffer* const mem,
                                             MemBufferMode expected) {
  if (mem->mode == MEM_MODE_NONE) {
-    mem->mode = expected;    // switch to the expected mode
+    mem->mode = expected;  // switch to the expected mode
  } else if (mem->mode != expected) {
-    return 0;         // we mixed the modes => error
+    return 0;  // we mixed the modes => error
  }
-  assert(mem->mode == expected);   // mode is ok
+  assert(mem->mode == expected);  // mode is ok
  return 1;
 }

@@ -396,7 +392,7 @@ static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) {
  assert(mem->part0_buf == NULL);
  // the following is a format limitation, no need for runtime check:
  assert(part_size <= mem->part0_size);
-  if (part_size == 0) {   // can't have zero-size partition #0
+  if (part_size == 0) {  // can't have zero-size partition #0
    return VP8_STATUS_BITSTREAM_ERROR;
  }
  if (mem->mode == MEM_MODE_APPEND) {
@@ -437,14 +433,14 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
  }

  // Allocate/Verify output buffer now
-  dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
-                                      output);
+  dec->status =
+      WebPAllocateDecBuffer(io->width, io->height, params->options, output);
  if (dec->status != VP8_STATUS_OK) {
    return IDecError(idec, dec->status);
  }
  // This change must be done before calling VP8InitFrame()
-  dec->mt_method = VP8GetThreadMethod(params->options, NULL,
-                                      io->width, io->height);
+  dec->mt_method =
+      VP8GetThreadMethod(params->options, NULL, io->width, io->height);
  VP8InitDithering(params->options, dec);

  dec->status = CopyParts0Data(idec);
@@ -512,7 +508,7 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
        assert(idec->mem.start <= idec->mem.end);
      }
    }
-    VP8InitScanline(dec);   // Prepare for next scanline
+    VP8InitScanline(dec);  // Prepare for next scanline

    // Reconstruct, filter and emit the row.
    if (!VP8ProcessRow(dec, io)) {
@@ -558,8 +554,8 @@ static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
    return ErrorStatusLossless(idec, dec->status);
  }
  // Allocate/verify output buffer now.
-  dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
-                                      output);
+  dec->status =
+      WebPAllocateDecBuffer(io->width, io->height, params->options, output);
  if (dec->status != VP8_STATUS_OK) {
    return IDecError(idec, dec->status);
  }
@@ -584,7 +580,7 @@ static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
                                               : FinishDecoding(idec);
 }

-  // Main decoding loop
+// Main decoding loop
 static VP8StatusCode IDecode(WebPIDecoder* idec) {
  VP8StatusCode status = VP8_STATUS_SUSPENDED;

@@ -592,7 +588,7 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
    status = DecodeWebPHeaders(idec);
  } else {
    if (idec->dec == NULL) {
-      return VP8_STATUS_SUSPENDED;    // can't continue if we have no decoder.
+      return VP8_STATUS_SUSPENDED;  // can't continue if we have no decoder.
    }
  }
  if (idec->state == STATE_VP8_HEADER) {
@@ -718,12 +714,12 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
  WebPIDecoder* idec;

  if (csp >= MODE_YUV) return NULL;
-  if (is_external_memory == 0) {    // Overwrite parameters to sane values.
+  if (is_external_memory == 0) {  // Overwrite parameters to sane values.
    output_buffer_size = 0;
    output_stride = 0;
  } else {  // A buffer was passed. Validate the other params.
    if (output_stride == 0 || output_buffer_size == 0) {
-      return NULL;   // invalid parameter.
+      return NULL;  // invalid parameter.
    }
  }
  idec = WebPINewDecoder(NULL);
@@ -737,14 +733,14 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
 }

 WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
-                           uint8_t* u, size_t u_size, int u_stride,
-                           uint8_t* v, size_t v_size, int v_stride,
-                           uint8_t* a, size_t a_size, int a_stride) {
+                           uint8_t* u, size_t u_size, int u_stride, uint8_t* v,
+                           size_t v_size, int v_stride, uint8_t* a,
+                           size_t a_size, int a_stride) {
  const int is_external_memory = (luma != NULL) ? 1 : 0;
  WebPIDecoder* idec;
  WEBP_CSP_MODE colorspace;

-  if (is_external_memory == 0) {    // Overwrite parameters to sane values.
+  if (is_external_memory == 0) {  // Overwrite parameters to sane values.
    luma_size = u_size = v_size = a_size = 0;
    luma_stride = u_stride = v_stride = a_stride = 0;
    u = v = a = NULL;
@@ -780,12 +776,10 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
 }

 WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
-                          uint8_t* u, size_t u_size, int u_stride,
-                          uint8_t* v, size_t v_size, int v_stride) {
-  return WebPINewYUVA(luma, luma_size, luma_stride,
-                      u, u_size, u_stride,
-                      v, v_size, v_stride,
-                      NULL, 0, 0);
+                          uint8_t* u, size_t u_size, int u_stride, uint8_t* v,
+                          size_t v_size, int v_stride) {
+  return WebPINewYUVA(luma, luma_size, luma_stride, u, u_size, u_stride, v,
+                      v_size, v_stride, NULL, 0, 0);
 }

 //------------------------------------------------------------------------------
@@ -801,8 +795,8 @@ static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
  return VP8_STATUS_SUSPENDED;
 }

-VP8StatusCode WebPIAppend(WebPIDecoder* idec,
-                          const uint8_t* data, size_t data_size) {
+VP8StatusCode WebPIAppend(WebPIDecoder* idec, const uint8_t* data,
+                          size_t data_size) {
  VP8StatusCode status;
  if (idec == NULL || data == NULL) {
    return VP8_STATUS_INVALID_PARAM;
@@ -822,8 +816,8 @@ VP8StatusCode WebPIAppend(WebPIDecoder* idec,
  return IDecode(idec);
 }

-VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
-                          const uint8_t* data, size_t data_size) {
+VP8StatusCode WebPIUpdate(WebPIDecoder* idec, const uint8_t* data,
+                          size_t data_size) {
  VP8StatusCode status;
  if (idec == NULL || data == NULL) {
    return VP8_STATUS_INVALID_PARAM;
@@ -853,14 +847,13 @@ static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
    return NULL;
  }
  if (idec->final_output != NULL) {
-    return NULL;   // not yet slow-copied
+    return NULL;  // not yet slow-copied
  }
  return idec->params.output;
 }

-const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
-                                      int* left, int* top,
-                                      int* width, int* height) {
+const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec, int* left,
+                                      int* top, int* width, int* height) {
  const WebPDecBuffer* const src = GetOutputBuffer(idec);
  if (left != NULL) *left = 0;
  if (top != NULL) *top = 0;
@@ -913,10 +906,8 @@ WEBP_NODISCARD uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
  return src->u.YUVA.y;
 }

-int WebPISetIOHooks(WebPIDecoder* const idec,
-                    VP8IoPutHook put,
-                    VP8IoSetupHook setup,
-                    VP8IoTeardownHook teardown,
+int WebPISetIOHooks(WebPIDecoder* const idec, VP8IoPutHook put,
+                    VP8IoSetupHook setup, VP8IoTeardownHook teardown,
                    void* user_data) {
  if (idec == NULL || idec->state > STATE_WEBP_HEADER) {
    return 0;
--- a/src/dec/io_dec.c
+++ b/src/dec/io_dec.c
@@ -17,7 +17,6 @@
 #include <string.h>

 #include "src/dec/vp8_dec.h"
-#include "src/webp/types.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dec/webpi_dec.h"
 #include "src/dsp/cpu.h"
@@ -26,6 +25,7 @@
 #include "src/utils/rescaler_utils.h"
 #include "src/utils/utils.h"
 #include "src/webp/decode.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Main YUV<->RGB conversion functions
@@ -51,9 +51,8 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
  WebPDecBuffer* const output = p->output;
  WebPRGBABuffer* const buf = &output->u.RGBA;
  uint8_t* const dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
-  WebPSamplerProcessPlane(io->y, io->y_stride,
-                          io->u, io->v, io->uv_stride,
-                          dst, buf->stride, io->mb_w, io->mb_h,
+  WebPSamplerProcessPlane(io->y, io->y_stride, io->u, io->v, io->uv_stride, dst,
+                          buf->stride, io->mb_w, io->mb_h,
                          WebPSamplers[output->colorspace]);
  return io->mb_h;
 }
@@ -63,7 +62,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {

 #ifdef FANCY_UPSAMPLING
 static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
-  int num_lines_out = io->mb_h;   // a priori guess
+  int num_lines_out = io->mb_h;  // a priori guess
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
  uint8_t* dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
  WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
@@ -82,8 +81,8 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
    upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w);
  } else {
    // We can finish the left-over line from previous call.
-    upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
-             dst - buf->stride, dst, mb_w);
+    upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v, dst - buf->stride,
+             dst, mb_w);
    ++num_lines_out;
  }
  // Loop over each output pairs of row.
@@ -94,8 +93,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
    cur_v += io->uv_stride;
    dst += 2 * buf->stride;
    cur_y += 2 * io->y_stride;
-    upsample(cur_y - io->y_stride, cur_y,
-             top_u, top_v, cur_u, cur_v,
+    upsample(cur_y - io->y_stride, cur_y, top_u, top_v, cur_u, cur_v,
             dst - buf->stride, dst, mb_w);
  }
  // move to last row
@@ -111,14 +109,14 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
  } else {
    // Process the very last row of even-sized picture
    if (!(y_end & 1)) {
-      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v,
-               dst + buf->stride, NULL, mb_w);
+      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst + buf->stride, NULL,
+               mb_w);
    }
  }
  return num_lines_out;
 }

-#endif    /* FANCY_UPSAMPLING */
+#endif /* FANCY_UPSAMPLING */

 //------------------------------------------------------------------------------

@@ -153,8 +151,8 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
  return 0;
 }

-static int GetAlphaSourceRow(const VP8Io* const io,
-                             const uint8_t** alpha, int* const num_rows) {
+static int GetAlphaSourceRow(const VP8Io* const io, const uint8_t** alpha,
+                             int* const num_rows) {
  int start_y = io->mb_y;
  *num_rows = io->mb_h;

@@ -192,14 +190,14 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
    uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)start_y * buf->stride;
    uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
-    const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w,
-                                            num_rows, dst, buf->stride);
+    const int has_alpha =
+        WebPDispatchAlpha(alpha, io->width, mb_w, num_rows, dst, buf->stride);
    (void)expected_num_lines_out;
    assert(expected_num_lines_out == num_rows);
    // has_alpha is true if there's non-trivial alpha to premultiply with.
    if (has_alpha && WebPIsPremultipliedMode(colorspace)) {
-      WebPApplyAlphaMultiply(base_rgba, alpha_first,
-                             mb_w, num_rows, buf->stride);
+      WebPApplyAlphaMultiply(base_rgba, alpha_first, mb_w, num_rows,
+                             buf->stride);
    }
  }
  return 0;
@@ -245,14 +243,14 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
 // YUV rescaling (no final RGB conversion needed)

 #if !defined(WEBP_REDUCE_SIZE)
-static int Rescale(const uint8_t* src, int src_stride,
-                   int new_lines, WebPRescaler* const wrk) {
+static int Rescale(const uint8_t* src, int src_stride, int new_lines,
+                   WebPRescaler* const wrk) {
  int num_lines_out = 0;
-  while (new_lines > 0) {    // import new contributions of source rows.
+  while (new_lines > 0) {  // import new contributions of source rows.
    const int lines_in = WebPRescalerImport(wrk, new_lines, src, src_stride);
    src += lines_in * src_stride;
    new_lines -= lines_in;
-    num_lines_out += WebPRescalerExport(wrk);    // emit output row(s)
+    num_lines_out += WebPRescalerExport(wrk);  // emit output row(s)
  }
  return num_lines_out;
 }
@@ -267,8 +265,8 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
    // internal buffer. This is OK since these samples are not used for
    // intra-prediction (the top samples are saved in cache_y/u/v).
    // But we need to cast the const away, though.
-    WebPMultRows((uint8_t*)io->y, io->y_stride,
-                 io->a, io->width, io->mb_w, mb_h, 0);
+    WebPMultRows((uint8_t*)io->y, io->y_stride, io->a, io->width, io->mb_w,
+                 mb_h, 0);
  }
  num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler);
  Rescale(io->u, io->uv_stride, uv_mb_h, p->scaler_u);
@@ -284,7 +282,7 @@ static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
    uint8_t* const dst_y = buf->y + (ptrdiff_t)p->last_y * buf->y_stride;
    const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a);
    assert(expected_num_lines_out == num_lines_out);
-    if (num_lines_out > 0) {   // unmultiply the Y
+    if (num_lines_out > 0) {  // unmultiply the Y
      WebPMultRows(dst_y, buf->y_stride, dst_a, buf->a_stride,
                   p->scaler_a->dst_width, num_lines_out, 1);
    }
@@ -300,11 +298,11 @@ static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
 static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
  const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
-  const int out_width  = io->scaled_width;
+  const int out_width = io->scaled_width;
  const int out_height = io->scaled_height;
-  const int uv_out_width  = (out_width + 1) >> 1;
+  const int uv_out_width = (out_width + 1) >> 1;
  const int uv_out_height = (out_height + 1) >> 1;
-  const int uv_in_width  = (io->mb_w + 1) >> 1;
+  const int uv_in_width = (io->mb_w + 1) >> 1;
  const int uv_in_height = (io->mb_h + 1) >> 1;
  // scratch memory for luma rescaler
  const size_t work_size = 2 * (size_t)out_width;
@@ -327,33 +325,32 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {

  p->memory = WebPSafeMalloc(1ULL, (size_t)total_size);
  if (p->memory == NULL) {
-    return 0;   // memory error
+    return 0;  // memory error
  }
  work = (rescaler_t*)p->memory;

-  scalers = (WebPRescaler*)WEBP_ALIGN(
-      (const uint8_t*)work + total_size - rescaler_size);
+  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size -
+                                      rescaler_size);
  p->scaler_y = &scalers[0];
  p->scaler_u = &scalers[1];
  p->scaler_v = &scalers[2];
  p->scaler_a = has_alpha ? &scalers[3] : NULL;

-  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
-                        buf->y, out_width, out_height, buf->y_stride, 1,
-                        work) ||
-      !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
-                        buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
+  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, buf->y, out_width,
+                        out_height, buf->y_stride, 1, work) ||
+      !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, buf->u,
+                        uv_out_width, uv_out_height, buf->u_stride, 1,
                        work + work_size) ||
-      !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
-                        buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
+      !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, buf->v,
+                        uv_out_width, uv_out_height, buf->v_stride, 1,
                        work + work_size + uv_work_size)) {
    return 0;
  }
  p->emit = EmitRescaledYUV;

  if (has_alpha) {
-    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
-                          buf->a, out_width, out_height, buf->a_stride, 1,
+    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, buf->a, out_width,
+                          out_height, buf->a_stride, 1,
                          work + work_size + 2 * uv_work_size)) {
      return 0;
    }
@@ -381,8 +378,8 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
    WebPRescalerExportRow(p->scaler_y);
    WebPRescalerExportRow(p->scaler_u);
    WebPRescalerExportRow(p->scaler_v);
-    convert(p->scaler_y->dst, p->scaler_u->dst, p->scaler_v->dst,
-            dst, p->scaler_y->dst_width);
+    convert(p->scaler_y->dst, p->scaler_u->dst, p->scaler_v->dst, dst,
+            p->scaler_y->dst_width);
    dst += buf->stride;
    ++num_lines_out;
  }
@@ -406,7 +403,7 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
      const int v_lines_in = WebPRescalerImport(
          p->scaler_v, uv_mb_h - uv_j, io->v + (ptrdiff_t)uv_j * io->uv_stride,
          io->uv_stride);
-      (void)v_lines_in;   // remove a gcc warning
+      (void)v_lines_in;  // remove a gcc warning
      assert(u_lines_in == v_lines_in);
      uv_j += u_lines_in;
    }
@@ -419,8 +416,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
  uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
  const WEBP_CSP_MODE colorspace = p->output->colorspace;
-  const int alpha_first =
-      (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+  const int alpha_first = (colorspace == MODE_ARGB || colorspace == MODE_Argb);
  uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
  int num_lines_out = 0;
  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
@@ -436,8 +432,8 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
    ++num_lines_out;
  }
  if (is_premult_alpha && non_opaque) {
-    WebPApplyAlphaMultiply(base_rgba, alpha_first,
-                           width, num_lines_out, buf->stride);
+    WebPApplyAlphaMultiply(base_rgba, alpha_first, width, num_lines_out,
+                           buf->stride);
  }
  return num_lines_out;
 }
@@ -495,14 +491,14 @@ static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p,

 static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
  const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
-  const int out_width  = io->scaled_width;
+  const int out_width = io->scaled_width;
  const int out_height = io->scaled_height;
-  const int uv_in_width  = (io->mb_w + 1) >> 1;
+  const int uv_in_width = (io->mb_w + 1) >> 1;
  const int uv_in_height = (io->mb_h + 1) >> 1;
  // scratch memory for one rescaler
  const size_t work_size = 2 * (size_t)out_width;
  rescaler_t* work;  // rescalers work area
-  uint8_t* tmp;   // tmp storage for scaled YUV444 samples before RGB conversion
+  uint8_t* tmp;  // tmp storage for scaled YUV444 samples before RGB conversion
  uint64_t tmp_size1, tmp_size2, total_size;
  size_t rescaler_size;
  WebPRescaler* scalers;
@@ -519,21 +515,20 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {

  p->memory = WebPSafeMalloc(1ULL, (size_t)total_size);
  if (p->memory == NULL) {
-    return 0;   // memory error
+    return 0;  // memory error
  }
  work = (rescaler_t*)p->memory;
  tmp = (uint8_t*)(work + tmp_size1);

-  scalers = (WebPRescaler*)WEBP_ALIGN(
-      (const uint8_t*)work + total_size - rescaler_size);
+  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size -
+                                      rescaler_size);
  p->scaler_y = &scalers[0];
  p->scaler_u = &scalers[1];
  p->scaler_v = &scalers[2];
  p->scaler_a = has_alpha ? &scalers[3] : NULL;

-  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
-                        tmp + 0 * out_width, out_width, out_height, 0, 1,
-                        work + 0 * work_size) ||
+  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, tmp + 0 * out_width,
+                        out_width, out_height, 0, 1, work + 0 * work_size) ||
      !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
                        tmp + 1 * out_width, out_width, out_height, 0, 1,
                        work + 1 * work_size) ||
@@ -546,9 +541,8 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
  WebPInitYUV444Converters();

  if (has_alpha) {
-    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
-                          tmp + 3 * out_width, out_width, out_height, 0, 1,
-                          work + 3 * work_size)) {
+    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, tmp + 3 * out_width,
+                          out_width, out_height, 0, 1, work + 3 * work_size)) {
      return 0;
    }
    p->emit_alpha = EmitRescaledAlphaRGB;
@@ -588,21 +582,21 @@ static int CustomSetup(VP8Io* io) {
 #if !defined(WEBP_REDUCE_SIZE)
    const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
    if (!ok) {
-      return 0;    // memory error
+      return 0;  // memory error
    }
 #else
-    return 0;   // rescaling support not compiled
+    return 0;  // rescaling support not compiled
 #endif
  } else {
    if (is_rgb) {
      WebPInitSamplers();
-      p->emit = EmitSampledRGB;   // default
+      p->emit = EmitSampledRGB;  // default
      if (io->fancy_upsampling) {
 #ifdef FANCY_UPSAMPLING
        const int uv_width = (io->mb_w + 1) >> 1;
        p->memory = WebPSafeMalloc(1ULL, (size_t)(io->mb_w + 2 * uv_width));
        if (p->memory == NULL) {
-          return 0;   // memory error.
+          return 0;  // memory error.
        }
        p->tmp_y = (uint8_t*)p->memory;
        p->tmp_u = p->tmp_y + io->mb_w;
@@ -616,10 +610,10 @@ static int CustomSetup(VP8Io* io) {
    }
    if (is_alpha) {  // need transparency output
      p->emit_alpha =
-          (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ?
-              EmitAlphaRGBA4444
+          (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444)
+              ? EmitAlphaRGBA4444
          : is_rgb ? EmitAlphaRGB
-          : EmitAlphaYUV;
+                   : EmitAlphaYUV;
      if (is_rgb) {
        WebPInitAlphaProcessing();
      }
@@ -661,10 +655,10 @@ static void CustomTeardown(const VP8Io* io) {
 // Main entry point

 void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
-  io->put      = CustomPut;
-  io->setup    = CustomSetup;
+  io->put = CustomPut;
+  io->setup = CustomSetup;
  io->teardown = CustomTeardown;
-  io->opaque   = params;
+  io->opaque = params;
 }

 //------------------------------------------------------------------------------
--- a/src/dec/quant_dec.c
+++ b/src/dec/quant_dec.c
@@ -17,48 +17,30 @@
 #include "src/utils/bit_reader_utils.h"
 #include "src/webp/types.h"

-static WEBP_INLINE int clip(int v, int M) {
-  return v < 0 ? 0 : v > M ? M : v;
-}
+static WEBP_INLINE int clip(int v, int M) { return v < 0 ? 0 : v > M ? M : v; }

 // Paragraph 14.1
 static const uint8_t kDcTable[128] = {
-  4,     5,   6,   7,   8,   9,  10,  10,
-  11,   12,  13,  14,  15,  16,  17,  17,
-  18,   19,  20,  20,  21,  21,  22,  22,
-  23,   23,  24,  25,  25,  26,  27,  28,
-  29,   30,  31,  32,  33,  34,  35,  36,
-  37,   37,  38,  39,  40,  41,  42,  43,
-  44,   45,  46,  46,  47,  48,  49,  50,
-  51,   52,  53,  54,  55,  56,  57,  58,
-  59,   60,  61,  62,  63,  64,  65,  66,
-  67,   68,  69,  70,  71,  72,  73,  74,
-  75,   76,  76,  77,  78,  79,  80,  81,
-  82,   83,  84,  85,  86,  87,  88,  89,
-  91,   93,  95,  96,  98, 100, 101, 102,
-  104, 106, 108, 110, 112, 114, 116, 118,
-  122, 124, 126, 128, 130, 132, 134, 136,
-  138, 140, 143, 145, 148, 151, 154, 157
-};
+    4,   5,   6,   7,   8,   9,   10,  10,  11,  12,  13,  14,  15,  16,  17,
+    17,  18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,
+    27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,
+    41,  42,  43,  44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,
+    55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
+    70,  71,  72,  73,  74,  75,  76,  76,  77,  78,  79,  80,  81,  82,  83,
+    84,  85,  86,  87,  88,  89,  91,  93,  95,  96,  98,  100, 101, 102, 104,
+    106, 108, 110, 112, 114, 116, 118, 122, 124, 126, 128, 130, 132, 134, 136,
+    138, 140, 143, 145, 148, 151, 154, 157};

 static const uint16_t kAcTable[128] = {
-  4,     5,   6,   7,   8,   9,  10,  11,
-  12,   13,  14,  15,  16,  17,  18,  19,
-  20,   21,  22,  23,  24,  25,  26,  27,
-  28,   29,  30,  31,  32,  33,  34,  35,
-  36,   37,  38,  39,  40,  41,  42,  43,
-  44,   45,  46,  47,  48,  49,  50,  51,
-  52,   53,  54,  55,  56,  57,  58,  60,
-  62,   64,  66,  68,  70,  72,  74,  76,
-  78,   80,  82,  84,  86,  88,  90,  92,
-  94,   96,  98, 100, 102, 104, 106, 108,
-  110, 112, 114, 116, 119, 122, 125, 128,
-  131, 134, 137, 140, 143, 146, 149, 152,
-  155, 158, 161, 164, 167, 170, 173, 177,
-  181, 185, 189, 193, 197, 201, 205, 209,
-  213, 217, 221, 225, 229, 234, 239, 245,
-  249, 254, 259, 264, 269, 274, 279, 284
-};
+    4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,
+    19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,
+    34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
+    49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,
+    70,  72,  74,  76,  78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98,
+    100, 102, 104, 106, 108, 110, 112, 114, 116, 119, 122, 125, 128, 131, 134,
+    137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, 177, 181,
+    185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 234, 239, 245,
+    249, 254, 259, 264, 269, 274, 279, 284};

 //------------------------------------------------------------------------------
 // Paragraph 9.6
@@ -66,16 +48,21 @@ static const uint16_t kAcTable[128] = {
 void VP8ParseQuant(VP8Decoder* const dec) {
  VP8BitReader* const br = &dec->br;
  const int base_q0 = VP8GetValue(br, 7, "global-header");
-  const int dqy1_dc = VP8Get(br, "global-header") ?
-       VP8GetSignedValue(br, 4, "global-header") : 0;
-  const int dqy2_dc = VP8Get(br, "global-header") ?
-       VP8GetSignedValue(br, 4, "global-header") : 0;
-  const int dqy2_ac = VP8Get(br, "global-header") ?
-       VP8GetSignedValue(br, 4, "global-header") : 0;
-  const int dquv_dc = VP8Get(br, "global-header") ?
-       VP8GetSignedValue(br, 4, "global-header") : 0;
-  const int dquv_ac = VP8Get(br, "global-header") ?
-       VP8GetSignedValue(br, 4, "global-header") : 0;
+  const int dqy1_dc = VP8Get(br, "global-header")
+                          ? VP8GetSignedValue(br, 4, "global-header")
+                          : 0;
+  const int dqy2_dc = VP8Get(br, "global-header")
+                          ? VP8GetSignedValue(br, 4, "global-header")
+                          : 0;
+  const int dqy2_ac = VP8Get(br, "global-header")
+                          ? VP8GetSignedValue(br, 4, "global-header")
+                          : 0;
+  const int dquv_dc = VP8Get(br, "global-header")
+                          ? VP8GetSignedValue(br, 4, "global-header")
+                          : 0;
+  const int dquv_ac = VP8Get(br, "global-header")
+                          ? VP8GetSignedValue(br, 4, "global-header")
+                          : 0;

  const VP8SegmentHeader* const hdr = &dec->segment_hdr;
  int i;
@@ -98,7 +85,7 @@ void VP8ParseQuant(VP8Decoder* const dec) {
    {
      VP8QuantMatrix* const m = &dec->dqm[i];
      m->y1_mat[0] = kDcTable[clip(q + dqy1_dc, 127)];
-      m->y1_mat[1] = kAcTable[clip(q + 0,       127)];
+      m->y1_mat[1] = kAcTable[clip(q + 0, 127)];

      m->y2_mat[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
      // For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
@@ -110,7 +97,7 @@ void VP8ParseQuant(VP8Decoder* const dec) {
      m->uv_mat[0] = kDcTable[clip(q + dquv_dc, 117)];
      m->uv_mat[1] = kAcTable[clip(q + dquv_ac, 127)];

-      m->uv_quant = q + dquv_ac;   // for dithering strength evaluation
+      m->uv_quant = q + dquv_ac;  // for dithering strength evaluation
    }
  }
 }
--- a/src/dec/tree_dec.c
+++ b/src/dec/tree_dec.c
@@ -14,18 +14,17 @@
 #include <string.h>

 #include "src/dec/common_dec.h"
-#include "src/webp/types.h"
 #include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dsp/cpu.h"
 #include "src/utils/bit_reader_inl_utils.h"
 #include "src/utils/bit_reader_utils.h"
+#include "src/webp/types.h"

 #if !defined(USE_GENERIC_TREE)
-#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && \
-    !defined(__wasm__)
+#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && !defined(__wasm__)
 // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
-#define USE_GENERIC_TREE 1   // ALTERNATE_CODE
+#define USE_GENERIC_TREE 1  // ALTERNATE_CODE
 #else
 #define USE_GENERIC_TREE 0
 #endif
@@ -33,269 +32,227 @@

 #if (USE_GENERIC_TREE == 1)
 static const int8_t kYModesIntra4[18] = {
-  -B_DC_PRED, 1,
-    -B_TM_PRED, 2,
-      -B_VE_PRED, 3,
-        4, 6,
-          -B_HE_PRED, 5,
-            -B_RD_PRED, -B_VR_PRED,
-        -B_LD_PRED, 7,
-          -B_VL_PRED, 8,
-            -B_HD_PRED, -B_HU_PRED
-};
+    -B_DC_PRED, 1, -B_TM_PRED, 2, -B_VE_PRED, 3,
+    4,          6, -B_HE_PRED, 5, -B_RD_PRED, -B_VR_PRED,
+    -B_LD_PRED, 7, -B_VL_PRED, 8, -B_HD_PRED, -B_HU_PRED};
 #endif

 //------------------------------------------------------------------------------
 // Default probabilities

 // Paragraph 13.5
-static const uint8_t
-  CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
-  { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
-    },
-    { { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
-      { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
-      { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
-    },
-    { { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
-      { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
-      { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
-    },
-    { { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
-      { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
-      { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
-    },
-    { { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
-      { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
-      { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
-    },
-    { { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
-      { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
-      { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
-    },
-    { { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
-      { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
-      { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
-    },
-    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
-    }
-  },
-  { { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
-      { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
-      { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
-    },
-    { { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
-      { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
-      { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
-    },
-    { { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
-      { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
-      { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
-    },
-    { { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
-      { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
-      { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
-    },
-    { { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
-      { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
-      { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
-    },
-    { { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
-      { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
-      { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
-    },
-    { { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
-      { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
-      { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
-    },
-    { { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
-      { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
-      { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
-    }
-  },
-  { { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
-      { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
-      { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
-    },
-    { { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
-      { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
-      { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
-    },
-    { { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
-      { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
-      { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
-    },
-    { { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
-      { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
-      { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
-    },
-    { { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
-      { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
-      { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
-    },
-    { { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
-    },
-    { { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
-      { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
-      { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
-    },
-    { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
-    }
-  },
-  { { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
-      { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
-      { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
-    },
-    { { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
-      { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
-      { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
-    },
-    { { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
-      { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
-      { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
-    },
-    { { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
-      { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
-      { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
-    },
-    { { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
-      { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
-      { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
-    },
-    { { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
-      { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
-      { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
-    },
-    { { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
-      { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
-      { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
-    },
-    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
-      { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
-    }
-  }
-};
+static const uint8_t CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+    {{{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128},
+      {189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128},
+      {106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128}},
+     {
+         {1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128},
+         {181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128},
+         {78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128},
+     },
+     {
+         {1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128},
+         {184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128},
+         {77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128},
+     },
+     {{1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128},
+      {170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128},
+      {37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128}},
+     {{1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128},
+      {207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128},
+      {102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128}},
+     {{1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128},
+      {177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128},
+      {80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128}},
+     {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
+    {{{198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62},
+      {131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1},
+      {68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128}},
+     {{1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128},
+      {184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128},
+      {81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128}},
+     {{1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128},
+      {99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128},
+      {23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128}},
+     {{1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128},
+      {109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128},
+      {44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128}},
+     {{1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128},
+      {94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128},
+      {22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128}},
+     {{1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128},
+      {124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128},
+      {35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128}},
+     {{1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128},
+      {121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128},
+      {45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128}},
+     {{1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128},
+      {203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+      {137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128}}},
+    {{{253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128},
+      {175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128},
+      {73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128}},
+     {{1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128},
+      {239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128},
+      {155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128}},
+     {{1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128},
+      {201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128},
+      {69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128}},
+     {{1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128},
+      {223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128},
+      {141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128}},
+     {{1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+      {190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128},
+      {149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128},
+      {213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128},
+      {55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
+    {{{202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255},
+      {126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128},
+      {61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128}},
+     {{1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128},
+      {166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128},
+      {39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128}},
+     {{1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128},
+      {124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128},
+      {24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128}},
+     {{1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128},
+      {149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128},
+      {28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128}},
+     {{1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128},
+      {123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128},
+      {20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128}},
+     {{1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128},
+      {168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128},
+      {47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128}},
+     {{1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128},
+      {141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128},
+      {42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128}},
+     {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}}}};

 // Paragraph 11.5
 static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
-  { { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
-    { 152, 179, 64, 126, 170, 118, 46, 70, 95 },
-    { 175, 69, 143, 80, 85, 82, 72, 155, 103 },
-    { 56, 58, 10, 171, 218, 189, 17, 13, 152 },
-    { 114, 26, 17, 163, 44, 195, 21, 10, 173 },
-    { 121, 24, 80, 195, 26, 62, 44, 64, 85 },
-    { 144, 71, 10, 38, 171, 213, 144, 34, 26 },
-    { 170, 46, 55, 19, 136, 160, 33, 206, 71 },
-    { 63, 20, 8, 114, 114, 208, 12, 9, 226 },
-    { 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
-  { { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
-    { 72, 187, 100, 130, 157, 111, 32, 75, 80 },
-    { 66, 102, 167, 99, 74, 62, 40, 234, 128 },
-    { 41, 53, 9, 178, 241, 141, 26, 8, 107 },
-    { 74, 43, 26, 146, 73, 166, 49, 23, 157 },
-    { 65, 38, 105, 160, 51, 52, 31, 115, 128 },
-    { 104, 79, 12, 27, 217, 255, 87, 17, 7 },
-    { 87, 68, 71, 44, 114, 51, 15, 186, 23 },
-    { 47, 41, 14, 110, 182, 183, 21, 17, 194 },
-    { 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
-  { { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
-    { 43, 97, 183, 117, 85, 38, 35, 179, 61 },
-    { 39, 53, 200, 87, 26, 21, 43, 232, 171 },
-    { 56, 34, 51, 104, 114, 102, 29, 93, 77 },
-    { 39, 28, 85, 171, 58, 165, 90, 98, 64 },
-    { 34, 22, 116, 206, 23, 34, 43, 166, 73 },
-    { 107, 54, 32, 26, 51, 1, 81, 43, 31 },
-    { 68, 25, 106, 22, 64, 171, 36, 225, 114 },
-    { 34, 19, 21, 102, 132, 188, 16, 76, 124 },
-    { 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
-  { { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
-    { 60, 148, 31, 172, 219, 228, 21, 18, 111 },
-    { 112, 113, 77, 85, 179, 255, 38, 120, 114 },
-    { 40, 42, 1, 196, 245, 209, 10, 25, 109 },
-    { 88, 43, 29, 140, 166, 213, 37, 43, 154 },
-    { 61, 63, 30, 155, 67, 45, 68, 1, 209 },
-    { 100, 80, 8, 43, 154, 1, 51, 26, 71 },
-    { 142, 78, 78, 16, 255, 128, 34, 197, 171 },
-    { 41, 40, 5, 102, 211, 183, 4, 1, 221 },
-    { 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
-  { { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
-    { 67, 87, 58, 169, 82, 115, 26, 59, 179 },
-    { 63, 59, 90, 180, 59, 166, 93, 73, 154 },
-    { 40, 40, 21, 116, 143, 209, 34, 39, 175 },
-    { 47, 15, 16, 183, 34, 223, 49, 45, 183 },
-    { 46, 17, 33, 183, 6, 98, 15, 32, 183 },
-    { 57, 46, 22, 24, 128, 1, 54, 17, 37 },
-    { 65, 32, 73, 115, 28, 128, 23, 128, 205 },
-    { 40, 3, 9, 115, 51, 192, 18, 6, 223 },
-    { 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
-  { { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
-    { 64, 90, 70, 205, 40, 41, 23, 26, 57 },
-    { 54, 57, 112, 184, 5, 41, 38, 166, 213 },
-    { 30, 34, 26, 133, 152, 116, 10, 32, 134 },
-    { 39, 19, 53, 221, 26, 114, 32, 73, 255 },
-    { 31, 9, 65, 234, 2, 15, 1, 118, 73 },
-    { 75, 32, 12, 51, 192, 255, 160, 43, 51 },
-    { 88, 31, 35, 67, 102, 85, 55, 186, 85 },
-    { 56, 21, 23, 111, 59, 205, 45, 37, 192 },
-    { 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
-  { { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
-    { 95, 84, 53, 89, 128, 100, 113, 101, 45 },
-    { 75, 79, 123, 47, 51, 128, 81, 171, 1 },
-    { 57, 17, 5, 71, 102, 57, 53, 41, 49 },
-    { 38, 33, 13, 121, 57, 73, 26, 1, 85 },
-    { 41, 10, 67, 138, 77, 110, 90, 47, 114 },
-    { 115, 21, 2, 10, 102, 255, 166, 23, 6 },
-    { 101, 29, 16, 10, 85, 128, 101, 196, 26 },
-    { 57, 18, 10, 102, 102, 213, 34, 20, 43 },
-    { 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
-  { { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
-    { 69, 60, 71, 38, 73, 119, 28, 222, 37 },
-    { 68, 45, 128, 34, 1, 47, 11, 245, 171 },
-    { 62, 17, 19, 70, 146, 85, 55, 62, 70 },
-    { 37, 43, 37, 154, 100, 163, 85, 160, 1 },
-    { 63, 9, 92, 136, 28, 64, 32, 201, 85 },
-    { 75, 15, 9, 9, 64, 255, 184, 119, 16 },
-    { 86, 6, 28, 5, 64, 255, 25, 248, 1 },
-    { 56, 8, 17, 132, 137, 255, 55, 116, 128 },
-    { 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
-  { { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
-    { 51, 103, 44, 131, 131, 123, 31, 6, 158 },
-    { 86, 40, 64, 135, 148, 224, 45, 183, 128 },
-    { 22, 26, 17, 131, 240, 154, 14, 1, 209 },
-    { 45, 16, 21, 91, 64, 222, 7, 1, 197 },
-    { 56, 21, 39, 155, 60, 138, 23, 102, 213 },
-    { 83, 12, 13, 54, 192, 255, 68, 47, 28 },
-    { 85, 26, 85, 85, 128, 128, 32, 146, 171 },
-    { 18, 11, 7, 63, 144, 171, 4, 4, 246 },
-    { 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
-  { { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
-    { 85, 126, 47, 87, 176, 51, 41, 20, 32 },
-    { 101, 75, 128, 139, 118, 146, 116, 128, 85 },
-    { 56, 41, 15, 176, 236, 85, 37, 9, 62 },
-    { 71, 30, 17, 119, 118, 255, 17, 18, 138 },
-    { 101, 38, 60, 138, 55, 70, 43, 26, 142 },
-    { 146, 36, 19, 30, 171, 255, 97, 27, 20 },
-    { 138, 45, 61, 62, 219, 1, 81, 188, 64 },
-    { 32, 41, 20, 117, 151, 142, 20, 21, 163 },
-    { 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
-};
+    {{231, 120, 48, 89, 115, 113, 120, 152, 112},
+     {152, 179, 64, 126, 170, 118, 46, 70, 95},
+     {175, 69, 143, 80, 85, 82, 72, 155, 103},
+     {56, 58, 10, 171, 218, 189, 17, 13, 152},
+     {114, 26, 17, 163, 44, 195, 21, 10, 173},
+     {121, 24, 80, 195, 26, 62, 44, 64, 85},
+     {144, 71, 10, 38, 171, 213, 144, 34, 26},
+     {170, 46, 55, 19, 136, 160, 33, 206, 71},
+     {63, 20, 8, 114, 114, 208, 12, 9, 226},
+     {81, 40, 11, 96, 182, 84, 29, 16, 36}},
+    {{134, 183, 89, 137, 98, 101, 106, 165, 148},
+     {72, 187, 100, 130, 157, 111, 32, 75, 80},
+     {66, 102, 167, 99, 74, 62, 40, 234, 128},
+     {41, 53, 9, 178, 241, 141, 26, 8, 107},
+     {74, 43, 26, 146, 73, 166, 49, 23, 157},
+     {65, 38, 105, 160, 51, 52, 31, 115, 128},
+     {104, 79, 12, 27, 217, 255, 87, 17, 7},
+     {87, 68, 71, 44, 114, 51, 15, 186, 23},
+     {47, 41, 14, 110, 182, 183, 21, 17, 194},
+     {66, 45, 25, 102, 197, 189, 23, 18, 22}},
+    {{88, 88, 147, 150, 42, 46, 45, 196, 205},
+     {43, 97, 183, 117, 85, 38, 35, 179, 61},
+     {39, 53, 200, 87, 26, 21, 43, 232, 171},
+     {56, 34, 51, 104, 114, 102, 29, 93, 77},
+     {39, 28, 85, 171, 58, 165, 90, 98, 64},
+     {34, 22, 116, 206, 23, 34, 43, 166, 73},
+     {107, 54, 32, 26, 51, 1, 81, 43, 31},
+     {68, 25, 106, 22, 64, 171, 36, 225, 114},
+     {34, 19, 21, 102, 132, 188, 16, 76, 124},
+     {62, 18, 78, 95, 85, 57, 50, 48, 51}},
+    {{193, 101, 35, 159, 215, 111, 89, 46, 111},
+     {60, 148, 31, 172, 219, 228, 21, 18, 111},
+     {112, 113, 77, 85, 179, 255, 38, 120, 114},
+     {40, 42, 1, 196, 245, 209, 10, 25, 109},
+     {88, 43, 29, 140, 166, 213, 37, 43, 154},
+     {61, 63, 30, 155, 67, 45, 68, 1, 209},
+     {100, 80, 8, 43, 154, 1, 51, 26, 71},
+     {142, 78, 78, 16, 255, 128, 34, 197, 171},
+     {41, 40, 5, 102, 211, 183, 4, 1, 221},
+     {51, 50, 17, 168, 209, 192, 23, 25, 82}},
+    {{138, 31, 36, 171, 27, 166, 38, 44, 229},
+     {67, 87, 58, 169, 82, 115, 26, 59, 179},
+     {63, 59, 90, 180, 59, 166, 93, 73, 154},
+     {40, 40, 21, 116, 143, 209, 34, 39, 175},
+     {47, 15, 16, 183, 34, 223, 49, 45, 183},
+     {46, 17, 33, 183, 6, 98, 15, 32, 183},
+     {57, 46, 22, 24, 128, 1, 54, 17, 37},
+     {65, 32, 73, 115, 28, 128, 23, 128, 205},
+     {40, 3, 9, 115, 51, 192, 18, 6, 223},
+     {87, 37, 9, 115, 59, 77, 64, 21, 47}},
+    {{104, 55, 44, 218, 9, 54, 53, 130, 226},
+     {64, 90, 70, 205, 40, 41, 23, 26, 57},
+     {54, 57, 112, 184, 5, 41, 38, 166, 213},
+     {30, 34, 26, 133, 152, 116, 10, 32, 134},
+     {39, 19, 53, 221, 26, 114, 32, 73, 255},
+     {31, 9, 65, 234, 2, 15, 1, 118, 73},
+     {75, 32, 12, 51, 192, 255, 160, 43, 51},
+     {88, 31, 35, 67, 102, 85, 55, 186, 85},
+     {56, 21, 23, 111, 59, 205, 45, 37, 192},
+     {55, 38, 70, 124, 73, 102, 1, 34, 98}},
+    {{125, 98, 42, 88, 104, 85, 117, 175, 82},
+     {95, 84, 53, 89, 128, 100, 113, 101, 45},
+     {75, 79, 123, 47, 51, 128, 81, 171, 1},
+     {57, 17, 5, 71, 102, 57, 53, 41, 49},
+     {38, 33, 13, 121, 57, 73, 26, 1, 85},
+     {41, 10, 67, 138, 77, 110, 90, 47, 114},
+     {115, 21, 2, 10, 102, 255, 166, 23, 6},
+     {101, 29, 16, 10, 85, 128, 101, 196, 26},
+     {57, 18, 10, 102, 102, 213, 34, 20, 43},
+     {117, 20, 15, 36, 163, 128, 68, 1, 26}},
+    {{102, 61, 71, 37, 34, 53, 31, 243, 192},
+     {69, 60, 71, 38, 73, 119, 28, 222, 37},
+     {68, 45, 128, 34, 1, 47, 11, 245, 171},
+     {62, 17, 19, 70, 146, 85, 55, 62, 70},
+     {37, 43, 37, 154, 100, 163, 85, 160, 1},
+     {63, 9, 92, 136, 28, 64, 32, 201, 85},
+     {75, 15, 9, 9, 64, 255, 184, 119, 16},
+     {86, 6, 28, 5, 64, 255, 25, 248, 1},
+     {56, 8, 17, 132, 137, 255, 55, 116, 128},
+     {58, 15, 20, 82, 135, 57, 26, 121, 40}},
+    {{164, 50, 31, 137, 154, 133, 25, 35, 218},
+     {51, 103, 44, 131, 131, 123, 31, 6, 158},
+     {86, 40, 64, 135, 148, 224, 45, 183, 128},
+     {22, 26, 17, 131, 240, 154, 14, 1, 209},
+     {45, 16, 21, 91, 64, 222, 7, 1, 197},
+     {56, 21, 39, 155, 60, 138, 23, 102, 213},
+     {83, 12, 13, 54, 192, 255, 68, 47, 28},
+     {85, 26, 85, 85, 128, 128, 32, 146, 171},
+     {18, 11, 7, 63, 144, 171, 4, 4, 246},
+     {35, 27, 10, 146, 174, 171, 12, 26, 128}},
+    {{190, 80, 35, 99, 180, 80, 126, 54, 45},
+     {85, 126, 47, 87, 176, 51, 41, 20, 32},
+     {101, 75, 128, 139, 118, 146, 116, 128, 85},
+     {56, 41, 15, 176, 236, 85, 37, 9, 62},
+     {71, 30, 17, 119, 118, 255, 17, 18, 138},
+     {101, 38, 60, 138, 55, 70, 43, 26, 142},
+     {146, 36, 19, 30, 171, 255, 97, 27, 20},
+     {138, 45, 61, 62, 219, 1, 81, 188, 64},
+     {32, 41, 20, 117, 151, 142, 20, 21, 163},
+     {112, 19, 12, 61, 195, 128, 48, 4, 24}}};

 void VP8ResetProba(VP8Proba* const proba) {
  memset(proba->segments, 255u, sizeof(proba->segments));
  // proba->bands[][] is initialized later
 }

-static void ParseIntraMode(VP8BitReader* const br,
-                           VP8Decoder* const dec, int mb_x) {
+static void ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec,
+                           int mb_x) {
  uint8_t* const top = dec->intra_t + 4 * mb_x;
  uint8_t* const left = dec->intra_l;
  VP8MBData* const block = dec->mb_data + mb_x;
@@ -304,9 +261,10 @@ static void ParseIntraMode(VP8BitReader* const br,
  // to decode more than 1 keyframe.
  if (dec->segment_hdr.update_map) {
    // Hardcoded tree parsing
-    block->segment = !VP8GetBit(br, dec->proba.segments[0], "segments")
-                   ?  VP8GetBit(br, dec->proba.segments[1], "segments")
-                   :  VP8GetBit(br, dec->proba.segments[2], "segments") + 2;
+    block->segment =
+        !VP8GetBit(br, dec->proba.segments[0], "segments")
+            ? VP8GetBit(br, dec->proba.segments[1], "segments")
+            : VP8GetBit(br, dec->proba.segments[2], "segments") + 2;
  } else {
    block->segment = 0;  // default for intra
  }
@@ -316,9 +274,9 @@ static void ParseIntraMode(VP8BitReader* const br,
  if (!block->is_i4x4) {
    // Hardcoded 16x16 intra-mode decision tree.
    const int ymode =
-        VP8GetBit(br, 156, "pred-modes") ?
-            (VP8GetBit(br, 128, "pred-modes") ? TM_PRED : H_PRED) :
-            (VP8GetBit(br, 163, "pred-modes") ? V_PRED : DC_PRED);
+        VP8GetBit(br, 156, "pred-modes")
+            ? (VP8GetBit(br, 128, "pred-modes") ? TM_PRED : H_PRED)
+            : (VP8GetBit(br, 163, "pred-modes") ? V_PRED : DC_PRED);
    block->imodes[0] = ymode;
    memset(top, ymode, 4 * sizeof(*top));
    memset(left, ymode, 4 * sizeof(*left));
@@ -339,18 +297,22 @@ static void ParseIntraMode(VP8BitReader* const br,
        ymode = -i;
 #else
        // Hardcoded tree parsing
-        ymode = !VP8GetBit(br, prob[0], "pred-modes") ? B_DC_PRED :
-                  !VP8GetBit(br, prob[1], "pred-modes") ? B_TM_PRED :
-                    !VP8GetBit(br, prob[2], "pred-modes") ? B_VE_PRED :
-                      !VP8GetBit(br, prob[3], "pred-modes") ?
-                        (!VP8GetBit(br, prob[4], "pred-modes") ? B_HE_PRED :
-                          (!VP8GetBit(br, prob[5], "pred-modes") ? B_RD_PRED
-                                                                 : B_VR_PRED)) :
-                        (!VP8GetBit(br, prob[6], "pred-modes") ? B_LD_PRED :
-                          (!VP8GetBit(br, prob[7], "pred-modes") ? B_VL_PRED :
-                            (!VP8GetBit(br, prob[8], "pred-modes") ? B_HD_PRED
-                                                                   : B_HU_PRED))
-                        );
+        ymode =
+            !VP8GetBit(br, prob[0], "pred-modes")   ? B_DC_PRED
+            : !VP8GetBit(br, prob[1], "pred-modes") ? B_TM_PRED
+            : !VP8GetBit(br, prob[2], "pred-modes") ? B_VE_PRED
+            : !VP8GetBit(br, prob[3], "pred-modes")
+                ? (!VP8GetBit(br, prob[4], "pred-modes")
+                       ? B_HE_PRED
+                       : (!VP8GetBit(br, prob[5], "pred-modes") ? B_RD_PRED
+                                                                : B_VR_PRED))
+                : (!VP8GetBit(br, prob[6], "pred-modes")
+                       ? B_LD_PRED
+                       : (!VP8GetBit(br, prob[7], "pred-modes")
+                              ? B_VL_PRED
+                              : (!VP8GetBit(br, prob[8], "pred-modes")
+                                     ? B_HD_PRED
+                                     : B_HU_PRED)));
 #endif  // USE_GENERIC_TREE
        top[x] = ymode;
      }
@@ -360,9 +322,10 @@ static void ParseIntraMode(VP8BitReader* const br,
    }
  }
  // Hardcoded UVMode decision tree
-  block->uvmode = !VP8GetBit(br, 142, "pred-modes-uv") ? DC_PRED
-                : !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
-                : VP8GetBit(br, 183, "pred-modes-uv") ? TM_PRED : H_PRED;
+  block->uvmode = !VP8GetBit(br, 142, "pred-modes-uv")   ? DC_PRED
+                  : !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
+                  : VP8GetBit(br, 183, "pred-modes-uv")  ? TM_PRED
+                                                         : H_PRED;
 }

 int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
@@ -378,145 +341,108 @@ int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {

 static const uint8_t
    CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
-  { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
-      { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
-      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    }
-  },
-  { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
-      { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }
-    },
-    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    }
-  },
-  { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
-      { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }
-    },
-    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    }
-  },
-  { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
-      { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    },
-    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
-      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
-    }
-  }
-};
+        {{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+          {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+          {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255},
+          {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255},
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
+        {{{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255},
+          {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255}},
+         {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+          {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
+        {{{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255},
+          {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255},
+          {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255}},
+         {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255}},
+         {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
+        {{{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255},
+          {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+          {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+          {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255}},
+         {{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+          {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+          {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+          {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+          {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+         {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+          {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}}};

 // Paragraph 9.9

 static const uint8_t kBands[16 + 1] = {
-  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
-  0  // extra entry as sentinel
+    0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+    0  // extra entry as sentinel
 };

 void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
@@ -527,9 +453,9 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
      for (c = 0; c < NUM_CTX; ++c) {
        for (p = 0; p < NUM_PROBAS; ++p) {
          const int v =
-              VP8GetBit(br, CoeffsUpdateProba[t][b][c][p], "global-header") ?
-                        VP8GetValue(br, 8, "global-header") :
-                        CoeffsProba0[t][b][c][p];
+              VP8GetBit(br, CoeffsUpdateProba[t][b][c][p], "global-header")
+                  ? VP8GetValue(br, 8, "global-header")
+                  : CoeffsProba0[t][b][c][p];
          proba->bands[t][b].probas[c][p] = v;
        }
      }
--- a/src/dec/vp8_dec.c
+++ b/src/dec/vp8_dec.c
@@ -11,13 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include "src/dec/vp8_dec.h"
+
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>

 #include "src/dec/alphai_dec.h"
 #include "src/dec/common_dec.h"
-#include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dec/vp8li_dec.h"
 #include "src/dec/webpi_dec.h"
@@ -41,8 +42,8 @@ int WebPGetDecoderVersion(void) {
 // Signature and pointer-to-function for GetCoeffs() variants below.

 typedef int (*GetCoeffsFunc)(VP8BitReader* const br,
-                             const VP8BandProbas* const prob[],
-                             int ctx, const quant_t dq, int n, int16_t* out);
+                             const VP8BandProbas* const prob[], int ctx,
+                             const quant_t dq, int n, int16_t* out);
 static volatile GetCoeffsFunc GetCoeffs = NULL;

 static void InitGetCoeffs(void);
@@ -95,8 +96,8 @@ void VP8Delete(VP8Decoder* const dec) {
  }
 }

-int VP8SetError(VP8Decoder* const dec,
-                VP8StatusCode error, const char* const msg) {
+int VP8SetError(VP8Decoder* const dec, VP8StatusCode error,
+                const char* const msg) {
  // VP8_STATUS_SUSPENDED is only meaningful in incremental decoding.
  assert(dec->incremental || error != VP8_STATUS_SUSPENDED);
  // The oldest error reported takes precedence over the new one.
@@ -111,39 +112,39 @@ int VP8SetError(VP8Decoder* const dec,
 //------------------------------------------------------------------------------

 int VP8CheckSignature(const uint8_t* const data, size_t data_size) {
-  return (data_size >= 3 &&
-          data[0] == 0x9d && data[1] == 0x01 && data[2] == 0x2a);
+  return (data_size >= 3 && data[0] == 0x9d && data[1] == 0x01 &&
+          data[2] == 0x2a);
 }

 int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
               int* const width, int* const height) {
  if (data == NULL || data_size < VP8_FRAME_HEADER_SIZE) {
-    return 0;         // not enough data
+    return 0;  // not enough data
  }
  // check signature
  if (!VP8CheckSignature(data + 3, data_size - 3)) {
-    return 0;         // Wrong signature.
+    return 0;  // Wrong signature.
  } else {
    const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16);
    const int key_frame = !(bits & 1);
    const int w = ((data[7] << 8) | data[6]) & 0x3fff;
    const int h = ((data[9] << 8) | data[8]) & 0x3fff;

-    if (!key_frame) {   // Not a keyframe.
+    if (!key_frame) {  // Not a keyframe.
      return 0;
    }

    if (((bits >> 1) & 7) > 3) {
-      return 0;         // unknown profile
+      return 0;  // unknown profile
    }
    if (!((bits >> 4) & 1)) {
-      return 0;         // first frame is invisible!
+      return 0;  // first frame is invisible!
    }
    if (((bits >> 5)) >= chunk_size) {  // partition_length
-      return 0;         // inconsistent size information.
+      return 0;                         // inconsistent size information.
    }
    if (w == 0 || h == 0) {
-      return 0;         // We don't support both width and height to be zero.
+      return 0;  // We don't support both width and height to be zero.
    }

    if (width) {
@@ -170,30 +171,34 @@ static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
 }

 // Paragraph 9.3
-static int ParseSegmentHeader(VP8BitReader* br,
-                              VP8SegmentHeader* hdr, VP8Proba* proba) {
+static int ParseSegmentHeader(VP8BitReader* br, VP8SegmentHeader* hdr,
+                              VP8Proba* proba) {
  assert(br != NULL);
  assert(hdr != NULL);
  hdr->use_segment = VP8Get(br, "global-header");
  if (hdr->use_segment) {
    hdr->update_map = VP8Get(br, "global-header");
-    if (VP8Get(br, "global-header")) {   // update data
+    if (VP8Get(br, "global-header")) {  // update data
      int s;
      hdr->absolute_delta = VP8Get(br, "global-header");
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        hdr->quantizer[s] = VP8Get(br, "global-header") ?
-            VP8GetSignedValue(br, 7, "global-header") : 0;
+        hdr->quantizer[s] = VP8Get(br, "global-header")
+                                ? VP8GetSignedValue(br, 7, "global-header")
+                                : 0;
      }
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        hdr->filter_strength[s] = VP8Get(br, "global-header") ?
-            VP8GetSignedValue(br, 6, "global-header") : 0;
+        hdr->filter_strength[s] =
+            VP8Get(br, "global-header")
+                ? VP8GetSignedValue(br, 6, "global-header")
+                : 0;
      }
    }
    if (hdr->update_map) {
      int s;
      for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
-        proba->segments[s] = VP8Get(br, "global-header") ?
-            VP8GetValue(br, 8, "global-header") : 255u;
+        proba->segments[s] = VP8Get(br, "global-header")
+                                 ? VP8GetValue(br, 8, "global-header")
+                                 : 255u;
      }
    }
  } else {
@@ -211,8 +216,8 @@ static int ParseSegmentHeader(VP8BitReader* br,
 // If we don't even have the partitions' sizes, then VP8_STATUS_NOT_ENOUGH_DATA
 // is returned, and this is an unrecoverable error.
 // If the partitions were positioned ok, VP8_STATUS_OK is returned.
-static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
-                                     const uint8_t* buf, size_t size) {
+static VP8StatusCode ParsePartitions(VP8Decoder* const dec, const uint8_t* buf,
+                                     size_t size) {
  VP8BitReader* const br = &dec->br;
  const uint8_t* sz = buf;
  const uint8_t* buf_end = buf + size;
@@ -247,12 +252,12 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
 // Paragraph 9.4
 static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
  VP8FilterHeader* const hdr = &dec->filter_hdr;
-  hdr->simple    = VP8Get(br, "global-header");
-  hdr->level     = VP8GetValue(br, 6, "global-header");
+  hdr->simple = VP8Get(br, "global-header");
+  hdr->level = VP8GetValue(br, 6, "global-header");
  hdr->sharpness = VP8GetValue(br, 3, "global-header");
  hdr->use_lf_delta = VP8Get(br, "global-header");
  if (hdr->use_lf_delta) {
-    if (VP8Get(br, "global-header")) {   // update lf-delta?
+    if (VP8Get(br, "global-header")) {  // update lf-delta?
      int i;
      for (i = 0; i < NUM_REF_LF_DELTAS; ++i) {
        if (VP8Get(br, "global-header")) {
@@ -290,8 +295,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
  buf = io->data;
  buf_size = io->data_size;
  if (buf_size < 4) {
-    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
-                       "Truncated header.");
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "Truncated header.");
  }

  // Paragraph 9.1
@@ -322,11 +326,10 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
                         "cannot parse picture header");
    }
    if (!VP8CheckSignature(buf, buf_size)) {
-      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
-                         "Bad code word");
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, "Bad code word");
    }
    pic_hdr->width = ((buf[4] << 8) | buf[3]) & 0x3fff;
-    pic_hdr->xscale = buf[4] >> 6;   // ratio: 1, 5/4 5/3 or 2
+    pic_hdr->xscale = buf[4] >> 6;  // ratio: 1, 5/4 5/3 or 2
    pic_hdr->height = ((buf[6] << 8) | buf[5]) & 0x3fff;
    pic_hdr->yscale = buf[6] >> 6;
    buf += 7;
@@ -342,11 +345,11 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
    // So they can be used interchangeably without always testing for
    // 'use_cropping'.
    io->use_cropping = 0;
-    io->crop_top  = 0;
+    io->crop_top = 0;
    io->crop_left = 0;
-    io->crop_right  = io->width;
+    io->crop_right = io->width;
    io->crop_bottom = io->height;
-    io->use_scaling  = 0;
+    io->use_scaling = 0;
    io->scaled_width = io->width;
    io->scaled_height = io->height;

@@ -360,8 +363,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
  // Check if we have all the partition #0 available, and initialize dec->br
  // to read this partition (and this partition only).
  if (frm_hdr->partition_length > buf_size) {
-    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
-                       "bad partition length");
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "bad partition length");
  }

  br = &dec->br;
@@ -392,11 +394,10 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {

  // Frame buffer marking
  if (!frm_hdr->key_frame) {
-    return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
-                       "Not a key frame.");
+    return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE, "Not a key frame.");
  }

-  VP8Get(br, "global-header");   // ignore the value of 'update_proba'
+  VP8Get(br, "global-header");  // ignore the value of 'update_proba'

  VP8ParseProba(br, dec);

@@ -408,15 +409,14 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
 //------------------------------------------------------------------------------
 // Residual decoding (Paragraph 13.2 / 13.3)

-static const uint8_t kCat3[] = { 173, 148, 140, 0 };
-static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
-static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
-static const uint8_t kCat6[] =
-  { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
-static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
-static const uint8_t kZigzag[16] = {
-  0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
-};
+static const uint8_t kCat3[] = {173, 148, 140, 0};
+static const uint8_t kCat4[] = {176, 155, 140, 135, 0};
+static const uint8_t kCat5[] = {180, 157, 141, 134, 130, 0};
+static const uint8_t kCat6[] = {254, 254, 243, 230, 196, 177,
+                                153, 140, 133, 130, 129, 0};
+static const uint8_t* const kCat3456[] = {kCat3, kCat4, kCat5, kCat6};
+static const uint8_t kZigzag[16] = {0, 1,  4,  8,  5, 2,  3,  6,
+                                    9, 12, 13, 10, 7, 11, 14, 15};

 // See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2
 static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
@@ -452,18 +452,18 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {

 // Returns the position of the last non-zero coeff plus one
 static int GetCoeffsFast(VP8BitReader* const br,
-                         const VP8BandProbas* const prob[],
-                         int ctx, const quant_t dq, int n, int16_t* out) {
+                         const VP8BandProbas* const prob[], int ctx,
+                         const quant_t dq, int n, int16_t* out) {
  const uint8_t* p = prob[n]->probas[ctx];
  for (; n < 16; ++n) {
    if (!VP8GetBit(br, p[0], "coeffs")) {
      return n;  // previous coeff was last non-zero coeff
    }
-    while (!VP8GetBit(br, p[1], "coeffs")) {       // sequence of zero coeffs
+    while (!VP8GetBit(br, p[1], "coeffs")) {  // sequence of zero coeffs
      p = prob[++n]->probas[0];
      if (n == 16) return 16;
    }
-    {        // non zero coeff
+    {  // non zero coeff
      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas[0];
      int v;
      if (!VP8GetBit(br, p[2], "coeffs")) {
@@ -482,18 +482,18 @@ static int GetCoeffsFast(VP8BitReader* const br,
 // This version of GetCoeffs() uses VP8GetBitAlt() which is an alternate version
 // of VP8GetBitAlt() targeting specific platforms.
 static int GetCoeffsAlt(VP8BitReader* const br,
-                        const VP8BandProbas* const prob[],
-                        int ctx, const quant_t dq, int n, int16_t* out) {
+                        const VP8BandProbas* const prob[], int ctx,
+                        const quant_t dq, int n, int16_t* out) {
  const uint8_t* p = prob[n]->probas[ctx];
  for (; n < 16; ++n) {
    if (!VP8GetBitAlt(br, p[0], "coeffs")) {
      return n;  // previous coeff was last non-zero coeff
    }
-    while (!VP8GetBitAlt(br, p[1], "coeffs")) {       // sequence of zero coeffs
+    while (!VP8GetBitAlt(br, p[1], "coeffs")) {  // sequence of zero coeffs
      p = prob[++n]->probas[0];
      if (n == 16) return 16;
    }
-    {        // non zero coeff
+    {  // non zero coeff
      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas[0];
      int v;
      if (!VP8GetBitAlt(br, p[2], "coeffs")) {
@@ -525,10 +525,10 @@ static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
  return nz_coeffs;
 }

-static int ParseResiduals(VP8Decoder* const dec,
-                          VP8MB* const mb, VP8BitReader* const token_br) {
-  const VP8BandProbas* (* const bands)[16 + 1] = dec->proba.bands_ptr;
-  const VP8BandProbas* const * ac_proba;
+static int ParseResiduals(VP8Decoder* const dec, VP8MB* const mb,
+                          VP8BitReader* const token_br) {
+  const VP8BandProbas*(*const bands)[16 + 1] = dec->proba.bands_ptr;
+  const VP8BandProbas* const* ac_proba;
  VP8MBData* const block = dec->mb_data + dec->mb_x;
  const VP8QuantMatrix* const q = &dec->dqm[block->segment];
  int16_t* dst = block->coeffs;
@@ -541,14 +541,14 @@ static int ParseResiduals(VP8Decoder* const dec,
  int first;

  memset(dst, 0, 384 * sizeof(*dst));
-  if (!block->is_i4x4) {    // parse DC
-    int16_t dc[16] = { 0 };
+  if (!block->is_i4x4) {  // parse DC
+    int16_t dc[16] = {0};
    const int ctx = mb->nz_dc + left_mb->nz_dc;
    const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat, 0, dc);
    mb->nz_dc = left_mb->nz_dc = (nz > 0);
-    if (nz > 1) {   // more than just the DC -> perform the full transform
+    if (nz > 1) {  // more than just the DC -> perform the full transform
      VP8TransformWHT(dc, dst);
-    } else {        // only DC is non-zero -> inlined simplified transform
+    } else {  // only DC is non-zero -> inlined simplified transform
      int i;
      const int dc0 = (dc[0] + 3) >> 3;
      for (i = 0; i < 16 * 16; i += 16) dst[i] = dc0;
@@ -669,7 +669,7 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
                           "Premature end-of-file encountered.");
      }
    }
-    VP8InitScanline(dec);   // Prepare for next scanline
+    VP8InitScanline(dec);  // Prepare for next scanline

    // Reconstruct, filter and emit the row.
    if (!VP8ProcessRow(dec, io)) {
@@ -703,7 +703,7 @@ int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {

  // Finish setting up the decoding parameter. Will call io->setup().
  ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK);
-  if (ok) {   // good to go.
+  if (ok) {  // good to go.
    // Will allocate memory and prepare everything.
    if (ok) ok = VP8InitFrame(dec, io);

--- a/src/dec/vp8_dec.h
+++ b/src/dec/vp8_dec.h
@@ -49,20 +49,20 @@ typedef void (*VP8IoTeardownHook)(const VP8Io* io);

 struct VP8Io {
  // set by VP8GetHeaders()
-  int width, height;         // picture dimensions, in pixels (invariable).
-                             // These are the original, uncropped dimensions.
-                             // The actual area passed to put() is stored
-                             // in mb_w / mb_h fields.
+  int width, height;  // picture dimensions, in pixels (invariable).
+                      // These are the original, uncropped dimensions.
+                      // The actual area passed to put() is stored
+                      // in mb_w / mb_h fields.

  // set before calling put()
  int mb_y;                  // position of the current rows (in pixels)
  int mb_w;                  // number of columns in the sample
  int mb_h;                  // number of rows in the sample
-  const uint8_t* y, *u, *v;  // rows to copy (in yuv420 format)
+  const uint8_t *y, *u, *v;  // rows to copy (in yuv420 format)
  int y_stride;              // row stride for luma
  int uv_stride;             // row stride for chroma

-  void* opaque;              // user data
+  void* opaque;  // user data

  // called when fresh samples are available. Currently, samples are in
  // YUV420 format, and can be up to width x 24 in size (depending on the
@@ -165,8 +165,8 @@ WEBP_EXTERN int VP8CheckSignature(const uint8_t* const data, size_t data_size);
 // can be passed NULL.
 WEBP_EXTERN int VP8GetInfo(
    const uint8_t* data,
-    size_t data_size,    // data available so far
-    size_t chunk_size,   // total data size expected in the chunk
+    size_t data_size,   // data available so far
+    size_t chunk_size,  // total data size expected in the chunk
    int* const width, int* const height);

 // Returns true if the next byte(s) in data is a VP8L signature.
@@ -175,12 +175,13 @@ WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
 // Validates the VP8L data-header and retrieves basic header information viz
 // width, height and alpha. Returns 0 in case of formatting error.
 // width/height/has_alpha can be passed NULL.
-WEBP_EXTERN int VP8LGetInfo(
-    const uint8_t* data, size_t data_size,  // data available so far
-    int* const width, int* const height, int* const has_alpha);
+WEBP_EXTERN int VP8LGetInfo(const uint8_t* data,
+                            size_t data_size,  // data available so far
+                            int* const width, int* const height,
+                            int* const has_alpha);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_DEC_VP8_DEC_H_
--- a/src/dec/vp8i_dec.h
+++ b/src/dec/vp8i_dec.h
@@ -14,7 +14,7 @@
 #ifndef WEBP_DEC_VP8I_DEC_H_
 #define WEBP_DEC_VP8I_DEC_H_

-#include <string.h>     // for memcpy()
+#include <string.h>  // for memcpy()

 #include "src/dec/common_dec.h"
 #include "src/dec/vp8_dec.h"
@@ -62,9 +62,9 @@ extern "C" {
 //  '|' = left sample,   '-' = top sample,    '+' = top-left sample
 //  't' = extra top-right sample for 4x4 modes
 #define YUV_SIZE (BPS * 17 + BPS * 9)
-#define Y_OFF    (BPS * 1 + 8)
-#define U_OFF    (Y_OFF + BPS * 16 + BPS)
-#define V_OFF    (U_OFF + 16)
+#define Y_OFF (BPS * 1 + 8)
+#define U_OFF (Y_OFF + BPS * 16 + BPS)
+#define V_OFF (U_OFF + 16)

 // minimal width under which lossy multi-threading is always disabled
 #define MIN_WIDTH_FOR_THREADS 512
@@ -84,15 +84,15 @@ typedef struct {
  uint16_t height;
  uint8_t xscale;
  uint8_t yscale;
-  uint8_t colorspace;   // 0 = YCbCr
+  uint8_t colorspace;  // 0 = YCbCr
  uint8_t clamp_type;
 } VP8PictureHeader;

 // segment features
 typedef struct {
  int use_segment;
-  int update_map;        // whether to update the segment map or not
-  int absolute_delta;    // absolute or delta values for quantizer and filter
+  int update_map;      // whether to update the segment map or not
+  int absolute_delta;  // absolute or delta values for quantizer and filter
  int8_t quantizer[NUM_MB_SEGMENTS];        // quantization changes
  int8_t filter_strength[NUM_MB_SEGMENTS];  // filter strength for segments
 } VP8SegmentHeader;
@@ -100,7 +100,7 @@ typedef struct {
 // probas associated to one of the contexts
 typedef uint8_t VP8ProbaArray[NUM_PROBAS];

-typedef struct {   // all the probas associated to one band
+typedef struct {  // all the probas associated to one band
  VP8ProbaArray probas[NUM_CTX];
 } VP8BandProbas;

@@ -114,9 +114,9 @@ typedef struct {

 // Filter parameters
 typedef struct {
-  int simple;                  // 0=complex, 1=simple
-  int level;                   // [0..63]
-  int sharpness;               // [0..7]
+  int simple;     // 0=complex, 1=simple
+  int level;      // [0..63]
+  int sharpness;  // [0..7]
  int use_lf_delta;
  int ref_lf_delta[NUM_REF_LF_DELTAS];
  int mode_lf_delta[NUM_MODE_LF_DELTAS];
@@ -125,33 +125,33 @@ typedef struct {
 //------------------------------------------------------------------------------
 // Informations about the macroblocks.

-typedef struct {  // filter specs
-  uint8_t f_limit;      // filter limit in [3..189], or 0 if no filtering
-  uint8_t f_ilevel;     // inner limit in [1..63]
-  uint8_t f_inner;      // do inner filtering?
-  uint8_t hev_thresh;   // high edge variance threshold in [0..2]
+typedef struct {       // filter specs
+  uint8_t f_limit;     // filter limit in [3..189], or 0 if no filtering
+  uint8_t f_ilevel;    // inner limit in [1..63]
+  uint8_t f_inner;     // do inner filtering?
+  uint8_t hev_thresh;  // high edge variance threshold in [0..2]
 } VP8FInfo;

 typedef struct {  // Top/Left Contexts used for syntax-parsing
-  uint8_t nz;        // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
-  uint8_t nz_dc;     // non-zero DC coeff (1bit)
+  uint8_t nz;     // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
+  uint8_t nz_dc;  // non-zero DC coeff (1bit)
 } VP8MB;

 // Dequantization matrices
-typedef int quant_t[2];      // [DC / AC].  Can be 'uint16_t[2]' too (~slower).
+typedef int quant_t[2];  // [DC / AC].  Can be 'uint16_t[2]' too (~slower).
 typedef struct {
  quant_t y1_mat, y2_mat, uv_mat;

-  int uv_quant;   // U/V quantizer value
-  int dither;     // dithering amplitude (0 = off, max=255)
+  int uv_quant;  // U/V quantizer value
+  int dither;    // dithering amplitude (0 = off, max=255)
 } VP8QuantMatrix;

 // Data needed to reconstruct a macroblock
 typedef struct {
-  int16_t coeffs[384];   // 384 coeffs = (16+4+4) * 4*4
-  uint8_t is_i4x4;       // true if intra4x4
-  uint8_t imodes[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
-  uint8_t uvmode;        // chroma prediction mode
+  int16_t coeffs[384];  // 384 coeffs = (16+4+4) * 4*4
+  uint8_t is_i4x4;      // true if intra4x4
+  uint8_t imodes[16];   // one 16x16 mode (#0) or sixteen 4x4 modes
+  uint8_t uvmode;       // chroma prediction mode
  // bit-wise info about the content of each sub-4x4 blocks (in decoding order).
  // Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
  //   code=0 -> no coefficient
@@ -161,7 +161,7 @@ typedef struct {
  // This allows to call specialized transform functions.
  uint32_t non_zero_y;
  uint32_t non_zero_uv;
-  uint8_t dither;      // local dithering strength (deduced from non_zero*)
+  uint8_t dither;  // local dithering strength (deduced from non_zero*)
  uint8_t skip;
  uint8_t segment;
 } VP8MBData;
@@ -186,7 +186,7 @@ typedef struct {

 struct VP8Decoder {
  VP8StatusCode status;
-  int ready;     // true if ready to decode a picture with VP8Decode()
+  int ready;              // true if ready to decode a picture with VP8Decode()
  const char* error_msg;  // set when status is not OK.

  // Main data source
@@ -194,17 +194,17 @@ struct VP8Decoder {
  int incremental;  // if true, incremental decoding is expected

  // headers
-  VP8FrameHeader   frm_hdr;
+  VP8FrameHeader frm_hdr;
  VP8PictureHeader pic_hdr;
-  VP8FilterHeader  filter_hdr;
+  VP8FilterHeader filter_hdr;
  VP8SegmentHeader segment_hdr;

  // Worker
  WebPWorker worker;
-  int mt_method;      // multi-thread method: 0=off, 1=[parse+recon][filter]
-                      // 2=[parse][recon+filter]
-  int cache_id;       // current cache row
-  int num_caches;     // number of cached rows of 16 pixels (1, 2 or 3)
+  int mt_method;   // multi-thread method: 0=off, 1=[parse+recon][filter]
+                   // 2=[parse][recon+filter]
+  int cache_id;    // current cache row
+  int num_caches;  // number of cached rows of 16 pixels (1, 2 or 3)
  VP8ThreadContext thread_ctx;  // Thread context

  // dimension, in macroblock units.
@@ -220,8 +220,8 @@ struct VP8Decoder {
  VP8BitReader parts[MAX_NUM_PARTITIONS];

  // Dithering strength, deduced from decoding options
-  int dither;                // whether to use dithering or not
-  VP8Random dithering_rg;    // random generator for dithering
+  int dither;              // whether to use dithering or not
+  VP8Random dithering_rg;  // random generator for dithering

  // dequantization (one set of DC/AC dequant factor per segment)
  VP8QuantMatrix dqm[NUM_MB_SEGMENTS];
@@ -232,16 +232,16 @@ struct VP8Decoder {
  uint8_t skip_p;

  // Boundary data cache and persistent buffers.
-  uint8_t* intra_t;      // top intra modes values: 4 * mb_w
-  uint8_t  intra_l[4];   // left intra modes values
+  uint8_t* intra_t;    // top intra modes values: 4 * mb_w
+  uint8_t intra_l[4];  // left intra modes values

  VP8TopSamples* yuv_t;  // top y/u/v samples

-  VP8MB* mb_info;        // contextual macroblock info (mb_w + 1)
-  VP8FInfo* f_info;      // filter strength info
-  uint8_t* yuv_b;        // main block for Y/U/V (size = YUV_SIZE)
+  VP8MB* mb_info;    // contextual macroblock info (mb_w + 1)
+  VP8FInfo* f_info;  // filter strength info
+  uint8_t* yuv_b;    // main block for Y/U/V (size = YUV_SIZE)

-  uint8_t* cache_y;      // macroblock row for storing unfiltered samples
+  uint8_t* cache_y;  // macroblock row for storing unfiltered samples
  uint8_t* cache_u;
  uint8_t* cache_v;
  int cache_y_stride;
@@ -252,8 +252,8 @@ struct VP8Decoder {
  size_t mem_size;

  // Per macroblock non-persistent infos.
-  int mb_x, mb_y;        // current position, in macroblock units
-  VP8MBData* mb_data;    // parsed reconstruction data
+  int mb_x, mb_y;      // current position, in macroblock units
+  VP8MBData* mb_data;  // parsed reconstruction data

  // Filtering side-info
  int filter_type;                          // 0=off, 1=simple, 2=complex
@@ -267,15 +267,15 @@ struct VP8Decoder {
  uint8_t* alpha_plane_mem;  // memory allocated for alpha_plane
  uint8_t* alpha_plane;      // output. Persistent, contains the whole data.
  const uint8_t* alpha_prev_line;  // last decoded alpha row (or NULL)
-  int alpha_dithering;       // derived from decoding options (0=off, 100=full)
+  int alpha_dithering;  // derived from decoding options (0=off, 100=full)
 };

 //------------------------------------------------------------------------------
 // internal functions. Not public.

 // in vp8.c
-int VP8SetError(VP8Decoder* const dec,
-                VP8StatusCode error, const char* const msg);
+int VP8SetError(VP8Decoder* const dec, VP8StatusCode error,
+                const char* const msg);

 // in tree.c
 void VP8ResetProba(VP8Proba* const proba);
@@ -299,8 +299,8 @@ WEBP_NODISCARD int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
 // Return the multi-threading method to use (0=off), depending
 // on options and bitstream size. Only for lossy decoding.
 int VP8GetThreadMethod(const WebPDecoderOptions* const options,
-                       const WebPHeaderStructure* const headers,
-                       int width, int height);
+                       const WebPHeaderStructure* const headers, int width,
+                       int height);
 // Initialize dithering post-process if needed.
 void VP8InitDithering(const WebPDecoderOptions* const options,
                      VP8Decoder* const dec);
@@ -314,13 +314,13 @@ WEBP_NODISCARD int VP8DecodeMB(VP8Decoder* const dec,

 // in alpha.c
 const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
-                                      const VP8Io* const io,
-                                      int row, int num_rows);
+                                      const VP8Io* const io, int row,
+                                      int num_rows);

 //------------------------------------------------------------------------------

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_DEC_VP8I_DEC_H_
--- a/src/dec/vp8l_dec.c
+++ b/src/dec/vp8l_dec.c
@@ -33,12 +33,12 @@
 #include "src/webp/format_constants.h"
 #include "src/webp/types.h"

-#define NUM_ARGB_CACHE_ROWS          16
+#define NUM_ARGB_CACHE_ROWS 16

 static const int kCodeLengthLiterals = 16;
 static const int kCodeLengthRepeatCode = 16;
-static const uint8_t kCodeLengthExtraBits[3] = { 2, 3, 7 };
-static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
+static const uint8_t kCodeLengthExtraBits[3] = {2, 3, 7};
+static const uint8_t kCodeLengthRepeatOffsets[3] = {3, 3, 11};

 // -----------------------------------------------------------------------------
 //  Five Huffman codes are used at each meta code:
@@ -47,44 +47,30 @@ static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
 //  3. red,
 //  4. blue, and,
 //  5. distance prefix codes.
-typedef enum {
-  GREEN = 0,
-  RED   = 1,
-  BLUE  = 2,
-  ALPHA = 3,
-  DIST  = 4
-} HuffIndex;
+typedef enum { GREEN = 0, RED = 1, BLUE = 2, ALPHA = 3, DIST = 4 } HuffIndex;

 static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = {
-  NUM_LITERAL_CODES + NUM_LENGTH_CODES,
-  NUM_LITERAL_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES,
-  NUM_DISTANCE_CODES
-};
+    NUM_LITERAL_CODES + NUM_LENGTH_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES,
+    NUM_LITERAL_CODES, NUM_DISTANCE_CODES};

-static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = {
-  0, 1, 1, 1, 0
-};
+static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = {0, 1, 1, 1, 0};

-#define NUM_CODE_LENGTH_CODES       19
+#define NUM_CODE_LENGTH_CODES 19
 static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
-  17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
-};
+    17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};

-#define CODE_TO_PLANE_CODES        120
+#define CODE_TO_PLANE_CODES 120
 static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
-  0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
-  0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
-  0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
-  0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
-  0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
-  0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
-  0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
-  0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
-  0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
-  0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
-  0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
-  0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
-};
+    0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, 0x26, 0x2a,
+    0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a, 0x25, 0x2b, 0x48, 0x04,
+    0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45,
+    0x4b, 0x34, 0x3c, 0x03, 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d,
+    0x44, 0x4c, 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
+    0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, 0x32, 0x3e,
+    0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, 0x64, 0x6c, 0x42, 0x4e,
+    0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b, 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e,
+    0x00, 0x74, 0x7c, 0x41, 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d,
+    0x51, 0x5f, 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70};

 // Memory needed for lookup tables of one Huffman tree group. Red, blue, alpha
 // and distance alphabets are constant (256 for red, blue and alpha, 40 for
@@ -96,19 +82,10 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
 // https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c
 #define FIXED_TABLE_SIZE (630 * 3 + 410)
 static const uint16_t kTableSize[12] = {
-  FIXED_TABLE_SIZE + 654,
-  FIXED_TABLE_SIZE + 656,
-  FIXED_TABLE_SIZE + 658,
-  FIXED_TABLE_SIZE + 662,
-  FIXED_TABLE_SIZE + 670,
-  FIXED_TABLE_SIZE + 686,
-  FIXED_TABLE_SIZE + 718,
-  FIXED_TABLE_SIZE + 782,
-  FIXED_TABLE_SIZE + 912,
-  FIXED_TABLE_SIZE + 1168,
-  FIXED_TABLE_SIZE + 1680,
-  FIXED_TABLE_SIZE + 2704
-};
+    FIXED_TABLE_SIZE + 654,  FIXED_TABLE_SIZE + 656,  FIXED_TABLE_SIZE + 658,
+    FIXED_TABLE_SIZE + 662,  FIXED_TABLE_SIZE + 670,  FIXED_TABLE_SIZE + 686,
+    FIXED_TABLE_SIZE + 718,  FIXED_TABLE_SIZE + 782,  FIXED_TABLE_SIZE + 912,
+    FIXED_TABLE_SIZE + 1168, FIXED_TABLE_SIZE + 1680, FIXED_TABLE_SIZE + 2704};

 static int VP8LSetError(VP8LDecoder* const dec, VP8StatusCode error) {
  // The oldest error reported takes precedence over the new one.
@@ -118,22 +95,19 @@ static int VP8LSetError(VP8LDecoder* const dec, VP8StatusCode error) {
  return 0;
 }

-static int DecodeImageStream(int xsize, int ysize,
-                             int is_level0,
+static int DecodeImageStream(int xsize, int ysize, int is_level0,
                             VP8LDecoder* const dec,
                             uint32_t** const decoded_data);

 //------------------------------------------------------------------------------

 int VP8LCheckSignature(const uint8_t* const data, size_t size) {
-  return (size >= VP8L_FRAME_HEADER_SIZE &&
-          data[0] == VP8L_MAGIC_BYTE &&
+  return (size >= VP8L_FRAME_HEADER_SIZE && data[0] == VP8L_MAGIC_BYTE &&
          (data[4] >> 5) == 0);  // version
 }

-static int ReadImageInfo(VP8LBitReader* const br,
-                         int* const width, int* const height,
-                         int* const has_alpha) {
+static int ReadImageInfo(VP8LBitReader* const br, int* const width,
+                         int* const height, int* const has_alpha) {
  if (VP8LReadBits(br, 8) != VP8L_MAGIC_BYTE) return 0;
  *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
  *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
@@ -142,12 +116,12 @@ static int ReadImageInfo(VP8LBitReader* const br,
  return !br->eos;
 }

-int VP8LGetInfo(const uint8_t* data, size_t data_size,
-                int* const width, int* const height, int* const has_alpha) {
+int VP8LGetInfo(const uint8_t* data, size_t data_size, int* const width,
+                int* const height, int* const has_alpha) {
  if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) {
-    return 0;         // not enough data
+    return 0;  // not enough data
  } else if (!VP8LCheckSignature(data, data_size)) {
-    return 0;         // bad signature
+    return 0;  // bad signature
  } else {
    int w, h, a;
    VP8LBitReader br;
@@ -262,9 +236,9 @@ static void BuildPackedTable(HTreeGroup* const htree_group) {
  }
 }

-static int ReadHuffmanCodeLengths(
-    VP8LDecoder* const dec, const int* const code_length_code_lengths,
-    int num_symbols, int* const code_lengths) {
+static int ReadHuffmanCodeLengths(VP8LDecoder* const dec,
+                                  const int* const code_length_code_lengths,
+                                  int num_symbols, int* const code_lengths) {
  int ok = 0;
  VP8LBitReader* const br = &dec->br;
  int symbol;
@@ -278,7 +252,7 @@ static int ReadHuffmanCodeLengths(
    goto End;
  }

-  if (VP8LReadBits(br, 1)) {    // use length
+  if (VP8LReadBits(br, 1)) {  // use length
    const int length_nbits = 2 + 2 * VP8LReadBits(br, 3);
    max_symbol = 2 + VP8LReadBits(br, length_nbits);
    if (max_symbol > num_symbols) {
@@ -316,7 +290,7 @@ static int ReadHuffmanCodeLengths(
  }
  ok = 1;

- End:
+End:
  VP8LHuffmanTablesDeallocate(&tables);
  if (!ok) return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
  return ok;
@@ -348,7 +322,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
    ok = 1;
  } else {  // Decode Huffman-coded code lengths.
    int i;
-    int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 };
+    int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = {0};
    const int num_codes = VP8LReadBits(br, 4) + 4;
    assert(num_codes <= NUM_CODE_LENGTH_CODES);

@@ -361,8 +335,8 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,

  ok = ok && !br->eos;
  if (ok) {
-    size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS,
-                                 code_lengths, alphabet_size);
+    size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS, code_lengths,
+                                 alphabet_size);
  }
  if (!ok || size == 0) {
    return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
@@ -449,7 +423,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
  hdr->num_htree_groups = num_htree_groups;
  hdr->htree_groups = htree_groups;

- Error:
+Error:
  WebPSafeFree(mapping);
  if (!ok) {
    WebPSafeFree(huffman_image);
@@ -554,7 +528,7 @@ int ReadHuffmanCodesHelper(int color_cache_bits, int num_htree_groups,
  }
  ok = 1;

- Error:
+Error:
  WebPSafeFree(code_lengths);
  if (!ok) {
    VP8LHuffmanTablesDeallocate(huffman_tables);
@@ -575,7 +549,7 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
  const int in_height = io->mb_h;
  const int out_height = io->scaled_height;
  const uint64_t work_size = 2 * num_channels * (uint64_t)out_width;
-  rescaler_t* work;        // Rescaler work area.
+  rescaler_t* work;  // Rescaler work area.
  const uint64_t scaled_data_size = (uint64_t)out_width;
  uint32_t* scaled_data;  // Temporary storage for scaled BGRA data.
  const uint64_t memory_size = sizeof(*dec->rescaler) +
@@ -595,13 +569,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
  scaled_data = (uint32_t*)memory;

  if (!WebPRescalerInit(dec->rescaler, in_width, in_height,
-                        (uint8_t*)scaled_data, out_width, out_height,
-                        0, num_channels, work)) {
+                        (uint8_t*)scaled_data, out_width, out_height, 0,
+                        num_channels, work)) {
    return 0;
  }
  return 1;
 }
-#endif   // WEBP_REDUCE_SIZE
+#endif  // WEBP_REDUCE_SIZE

 //------------------------------------------------------------------------------
 // Export to ARGB
@@ -626,9 +600,9 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
 }

 // Emit scaled rows.
-static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
-                                uint8_t* in, int in_stride, int mb_h,
-                                uint8_t* const out, int out_stride) {
+static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec, uint8_t* in,
+                                int in_stride, int mb_h, uint8_t* const out,
+                                int out_stride) {
  const WEBP_CSP_MODE colorspace = dec->output->colorspace;
  int num_lines_in = 0;
  int num_lines_out = 0;
@@ -639,8 +613,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
    const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
    int lines_imported;
    assert(needed_lines > 0 && needed_lines <= lines_left);
-    WebPMultARGBRows(row_in, in_stride,
-                     dec->rescaler->src_width, needed_lines, 0);
+    WebPMultARGBRows(row_in, in_stride, dec->rescaler->src_width, needed_lines,
+                     0);
    lines_imported =
        WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride);
    assert(lines_imported == needed_lines);
@@ -650,13 +624,12 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
  return num_lines_out;
 }

-#endif   // WEBP_REDUCE_SIZE
+#endif  // WEBP_REDUCE_SIZE

 // Emit rows without any scaling.
-static int EmitRows(WEBP_CSP_MODE colorspace,
-                    const uint8_t* row_in, int in_stride,
-                    int mb_w, int mb_h,
-                    uint8_t* const out, int out_stride) {
+static int EmitRows(WEBP_CSP_MODE colorspace, const uint8_t* row_in,
+                    int in_stride, int mb_w, int mb_h, uint8_t* const out,
+                    int out_stride) {
  int lines = mb_h;
  uint8_t* row_out = out;
  while (lines-- > 0) {
@@ -711,8 +684,8 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
  return num_lines_out;
 }

-static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
-                                uint8_t* in, int in_stride, int mb_h) {
+static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec, uint8_t* in,
+                                int in_stride, int mb_h) {
  int num_lines_in = 0;
  int y_pos = dec->last_out_row;
  while (num_lines_in < mb_h) {
@@ -730,9 +703,8 @@ static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
  return y_pos;
 }

-static int EmitRowsYUVA(const VP8LDecoder* const dec,
-                        const uint8_t* in, int in_stride,
-                        int mb_w, int num_rows) {
+static int EmitRowsYUVA(const VP8LDecoder* const dec, const uint8_t* in,
+                        int in_stride, int mb_w, int num_rows) {
  int y_pos = dec->last_out_row;
  while (num_rows-- > 0) {
    ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output);
@@ -774,8 +746,8 @@ static int SetCropWindow(VP8Io* const io, int y_start, int y_end,

 //------------------------------------------------------------------------------

-static WEBP_INLINE int GetMetaIndex(
-    const uint32_t* const image, int xsize, int bits, int x, int y) {
+static WEBP_INLINE int GetMetaIndex(const uint32_t* const image, int xsize,
+                                    int bits, int x, int y) {
  if (bits == 0) return 0;
  return image[xsize * (y >> bits) + (x >> bits)];
 }
@@ -793,9 +765,8 @@ static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,

 typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);

-static void ApplyInverseTransforms(VP8LDecoder* const dec,
-                                   int start_row, int num_rows,
-                                   const uint32_t* const rows) {
+static void ApplyInverseTransforms(VP8LDecoder* const dec, int start_row,
+                                   int num_rows, const uint32_t* const rows) {
  int n = dec->next_transform;
  const int cache_pixs = dec->width * num_rows;
  const int end_row = start_row + num_rows;
@@ -824,7 +795,7 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
  // We can't process more than NUM_ARGB_CACHE_ROWS at a time (that's the size
  // of argb_cache), but we currently don't need more than that.
  assert(num_rows <= NUM_ARGB_CACHE_ROWS);
-  if (num_rows > 0) {    // Emit output.
+  if (num_rows > 0) {  // Emit output.
    VP8Io* const io = dec->io;
    uint8_t* rows_data = (uint8_t*)dec->argb_cache;
    const int in_stride = io->width * sizeof(uint32_t);  // in unit of RGBA
@@ -839,18 +810,19 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
            buf->rgba + (ptrdiff_t)dec->last_out_row * buf->stride;
        const int num_rows_out =
 #if !defined(WEBP_REDUCE_SIZE)
-         io->use_scaling ?
-            EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
-                                 rgba, buf->stride) :
+            io->use_scaling ? EmitRescaledRowsRGBA(dec, rows_data, in_stride,
+                                                   io->mb_h, rgba, buf->stride)
+                            :
 #endif  // WEBP_REDUCE_SIZE
-            EmitRows(output->colorspace, rows_data, in_stride,
-                     io->mb_w, io->mb_h, rgba, buf->stride);
+                            EmitRows(output->colorspace, rows_data, in_stride,
+                                     io->mb_w, io->mb_h, rgba, buf->stride);
        // Update 'last_out_row'.
        dec->last_out_row += num_rows_out;
-      } else {                              // convert to YUVA
-        dec->last_out_row = io->use_scaling ?
-            EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h) :
-            EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
+      } else {  // convert to YUVA
+        dec->last_out_row =
+            io->use_scaling
+                ? EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h)
+                : EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
      }
      assert(dec->last_out_row <= output->height);
    }
@@ -877,9 +849,8 @@ static int Is8bOptimizable(const VP8LMetadata* const hdr) {
  return 1;
 }

-static void AlphaApplyFilter(ALPHDecoder* const alph_dec,
-                             int first_row, int last_row,
-                             uint8_t* out, int stride) {
+static void AlphaApplyFilter(ALPHDecoder* const alph_dec, int first_row,
+                             int last_row, uint8_t* out, int stride) {
  if (alph_dec->filter != WEBP_FILTER_NONE) {
    int y;
    const uint8_t* prev_line = alph_dec->prev_line;
@@ -897,23 +868,22 @@ static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int last_row) {
  // For vertical and gradient filtering, we need to decode the part above the
  // crop_top row, in order to have the correct spatial predictors.
  ALPHDecoder* const alph_dec = (ALPHDecoder*)dec->io->opaque;
-  const int top_row =
-      (alph_dec->filter == WEBP_FILTER_NONE ||
-       alph_dec->filter == WEBP_FILTER_HORIZONTAL) ? dec->io->crop_top
-                                                    : dec->last_row;
+  const int top_row = (alph_dec->filter == WEBP_FILTER_NONE ||
+                       alph_dec->filter == WEBP_FILTER_HORIZONTAL)
+                          ? dec->io->crop_top
+                          : dec->last_row;
  const int first_row = (dec->last_row < top_row) ? top_row : dec->last_row;
  assert(last_row <= dec->io->crop_bottom);
  if (last_row > first_row) {
    // Special method for paletted alpha data. We only process the cropped area.
    const int width = dec->io->width;
    uint8_t* out = alph_dec->output + width * first_row;
-    const uint8_t* const in =
-      (uint8_t*)dec->pixels + dec->width * first_row;
+    const uint8_t* const in = (uint8_t*)dec->pixels + dec->width * first_row;
    VP8LTransform* const transform = &dec->transforms[0];
    assert(dec->next_transform == 1);
    assert(transform->type == COLOR_INDEXING_TRANSFORM);
-    VP8LColorIndexInverseTransformAlpha(transform, first_row, last_row,
-                                        in, out);
+    VP8LColorIndexInverseTransformAlpha(transform, first_row, last_row, in,
+                                        out);
    AlphaApplyFilter(alph_dec, first_row, last_row, out, width);
  }
  dec->last_row = dec->last_out_row = last_row;
@@ -959,11 +929,11 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
    switch (dist) {
      case 1:
        pattern = src[0];
-#if defined(__arm__) || defined(_M_ARM)   // arm doesn't like multiply that much
+#if defined(__arm__) || defined(_M_ARM)  // arm doesn't like multiply that much
        pattern |= pattern << 8;
        pattern |= pattern << 16;
 #elif defined(WEBP_USE_MIPS_DSP_R2)
-        __asm__ volatile ("replv.qb %0, %0" : "+r"(pattern));
+        __asm__ volatile("replv.qb %0, %0" : "+r"(pattern));
 #else
        pattern = 0x01010101u * pattern;
 #endif
@@ -977,7 +947,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
 #if defined(__arm__) || defined(_M_ARM)
        pattern |= pattern << 16;
 #elif defined(WEBP_USE_MIPS_DSP_R2)
-        __asm__ volatile ("replv.ph %0, %0" : "+r"(pattern));
+        __asm__ volatile("replv.ph %0, %0" : "+r"(pattern));
 #else
        pattern = 0x00010001u * pattern;
 #endif
@@ -991,7 +961,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
    CopySmallPattern8b(src, dst, length, pattern);
    return;
  }
- Copy:
+Copy:
  if (dist >= length) {  // no overlap -> use memcpy()
    memcpy(dst, src, length * sizeof(*dst));
  } else {
@@ -1001,26 +971,25 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
 }

 // copy pattern of 1 or 2 uint32_t's
-static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src,
-                                            uint32_t* dst,
+static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src, uint32_t* dst,
                                            int length, uint64_t pattern) {
  int i;
-  if ((uintptr_t)dst & 4) {           // Align 'dst' to 8-bytes boundary.
+  if ((uintptr_t)dst & 4) {  // Align 'dst' to 8-bytes boundary.
    *dst++ = *src++;
    pattern = (pattern >> 32) | (pattern << 32);
    --length;
  }
  assert(0 == ((uintptr_t)dst & 7));
  for (i = 0; i < (length >> 1); ++i) {
-    ((uint64_t*)dst)[i] = pattern;    // Copy the pattern 8 bytes at a time.
+    ((uint64_t*)dst)[i] = pattern;  // Copy the pattern 8 bytes at a time.
  }
-  if (length & 1) {                   // Finish with left-over.
+  if (length & 1) {  // Finish with left-over.
    dst[i << 1] = src[i << 1];
  }
 }

-static WEBP_INLINE void CopyBlock32b(uint32_t* const dst,
-                                     int dist, int length) {
+static WEBP_INLINE void CopyBlock32b(uint32_t* const dst, int dist,
+                                     int length) {
  const uint32_t* const src = dst - dist;
  if (dist <= 2 && length >= 4 && ((uintptr_t)dst & 3) == 0) {
    uint64_t pattern;
@@ -1114,7 +1083,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
  // Process the remaining rows corresponding to last row-block.
  ExtractPalettedAlphaRows(dec, row > last_row ? last_row : row);

- End:
+End:
  br->eos = VP8LIsEndOfStream(br);
  if (!ok || (br->eos && pos < end)) {
    return VP8LSetError(
@@ -1297,7 +1266,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
  }
  return 1;

- Error:
+Error:
  return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
 }

@@ -1314,9 +1283,8 @@ static void ClearTransform(VP8LTransform* const transform) {
 static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
  int i;
  const int final_num_colors = 1 << (8 >> transform->bits);
-  uint32_t* const new_color_map =
-      (uint32_t*)WebPSafeMalloc((uint64_t)final_num_colors,
-                                sizeof(*new_color_map));
+  uint32_t* const new_color_map = (uint32_t*)WebPSafeMalloc(
+      (uint64_t)final_num_colors, sizeof(*new_color_map));
  if (new_color_map == NULL) {
    return 0;
  } else {
@@ -1362,31 +1330,30 @@ static int ReadTransform(int* const xsize, int const* ysize,
    case CROSS_COLOR_TRANSFORM:
      transform->bits =
          MIN_TRANSFORM_BITS + VP8LReadBits(br, NUM_TRANSFORM_BITS);
-      ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize,
-                                               transform->bits),
-                             VP8LSubSampleSize(transform->ysize,
-                                               transform->bits),
-                             /*is_level0=*/0, dec, &transform->data);
+      ok = DecodeImageStream(
+          VP8LSubSampleSize(transform->xsize, transform->bits),
+          VP8LSubSampleSize(transform->ysize, transform->bits),
+          /*is_level0=*/0, dec, &transform->data);
      break;
    case COLOR_INDEXING_TRANSFORM: {
-       const int num_colors = VP8LReadBits(br, 8) + 1;
-       const int bits = (num_colors > 16) ? 0
-                      : (num_colors > 4) ? 1
-                      : (num_colors > 2) ? 2
-                      : 3;
-       *xsize = VP8LSubSampleSize(transform->xsize, bits);
-       transform->bits = bits;
-       ok = DecodeImageStream(num_colors, /*ysize=*/1, /*is_level0=*/0, dec,
-                              &transform->data);
-       if (ok && !ExpandColorMap(num_colors, transform)) {
-         return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-       }
+      const int num_colors = VP8LReadBits(br, 8) + 1;
+      const int bits = (num_colors > 16)  ? 0
+                       : (num_colors > 4) ? 1
+                       : (num_colors > 2) ? 2
+                                          : 3;
+      *xsize = VP8LSubSampleSize(transform->xsize, bits);
+      transform->bits = bits;
+      ok = DecodeImageStream(num_colors, /*ysize=*/1, /*is_level0=*/0, dec,
+                             &transform->data);
+      if (ok && !ExpandColorMap(num_colors, transform)) {
+        return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
+      }
      break;
    }
    case SUBTRACT_GREEN_TRANSFORM:
      break;
    default:
-      assert(0);    // can't happen
+      assert(0);  // can't happen
      break;
  }

@@ -1444,7 +1411,7 @@ static void VP8LClear(VP8LDecoder* const dec) {
  WebPSafeFree(dec->rescaler_memory);
  dec->rescaler_memory = NULL;

-  dec->output = NULL;   // leave no trace behind
+  dec->output = NULL;  // leave no trace behind
 }

 void VP8LDelete(VP8LDecoder* const dec) {
@@ -1464,8 +1431,7 @@ static void UpdateDecoder(VP8LDecoder* const dec, int width, int height) {
  hdr->huffman_mask = (num_bits == 0) ? ~0 : (1 << num_bits) - 1;
 }

-static int DecodeImageStream(int xsize, int ysize,
-                             int is_level0,
+static int DecodeImageStream(int xsize, int ysize, int is_level0,
                             VP8LDecoder* const dec,
                             uint32_t** const decoded_data) {
  int ok = 1;
@@ -1513,7 +1479,7 @@ static int DecodeImageStream(int xsize, int ysize,
  }
  UpdateDecoder(dec, transform_xsize, transform_ysize);

-  if (is_level0) {   // level 0 complete
+  if (is_level0) {  // level 0 complete
    dec->state = READ_HDR;
    goto End;
  }
@@ -1532,7 +1498,7 @@ static int DecodeImageStream(int xsize, int ysize,
                       transform_ysize, NULL);
  ok = ok && !br->eos;

- End:
+End:
  if (!ok) {
    WebPSafeFree(data);
    ClearMetadata(hdr);
@@ -1566,7 +1532,7 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
  assert(dec->width <= final_width);
  dec->pixels = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t));
  if (dec->pixels == NULL) {
-    dec->argb_cache = NULL;    // for soundness
+    dec->argb_cache = NULL;  // for soundness
    return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
  }
  dec->argb_cache = dec->pixels + num_pixels + cache_top_pixels;
@@ -1575,7 +1541,7 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {

 static int AllocateInternalBuffers8b(VP8LDecoder* const dec) {
  const uint64_t total_num_pixels = (uint64_t)dec->width * dec->height;
-  dec->argb_cache = NULL;    // for soundness
+  dec->argb_cache = NULL;  // for soundness
  dec->pixels = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t));
  if (dec->pixels == NULL) {
    return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
@@ -1598,14 +1564,14 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int last_row) {
    // Extract alpha (which is stored in the green plane).
    ALPHDecoder* const alph_dec = (ALPHDecoder*)dec->io->opaque;
    uint8_t* const output = alph_dec->output;
-    const int width = dec->io->width;      // the final width (!= dec->width)
+    const int width = dec->io->width;  // the final width (!= dec->width)
    const int cache_pixs = width * num_rows_to_process;
    uint8_t* const dst = output + width * cur_row;
    const uint32_t* const src = dec->argb_cache;
    ApplyInverseTransforms(dec, cur_row, num_rows_to_process, in);
    WebPExtractGreen(src, dst, cache_pixs);
-    AlphaApplyFilter(alph_dec,
-                     cur_row, cur_row + num_rows_to_process, dst, width);
+    AlphaApplyFilter(alph_dec, cur_row, cur_row + num_rows_to_process, dst,
+                     width);
    num_rows -= num_rows_to_process;
    in += num_rows_to_process * dec->width;
    cur_row += num_rows_to_process;
@@ -1658,7 +1624,7 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
  alph_dec->vp8l_dec = dec;
  return 1;

- Err:
+Err:
  VP8LDelete(dec);
  return 0;
 }
@@ -1675,11 +1641,11 @@ int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) {
  if (!alph_dec->use_8b_decode) WebPInitAlphaProcessing();

  // Decode (with special row processing).
-  return alph_dec->use_8b_decode ?
-      DecodeAlphaData(dec, (uint8_t*)dec->pixels, dec->width, dec->height,
-                      last_row) :
-      DecodeImageData(dec, dec->pixels, dec->width, dec->height,
-                      last_row, ExtractAlphaRows);
+  return alph_dec->use_8b_decode
+             ? DecodeAlphaData(dec, (uint8_t*)dec->pixels, dec->width,
+                               dec->height, last_row)
+             : DecodeImageData(dec, dec->pixels, dec->width, dec->height,
+                               last_row, ExtractAlphaRows);
 }

 //------------------------------------------------------------------------------
@@ -1709,7 +1675,7 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
  }
  return 1;

- Error:
+Error:
  VP8LClear(dec);
  assert(dec->status != VP8_STATUS_OK);
  return 0;
@@ -1781,7 +1747,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
  params->last_y = dec->last_out_row;
  return 1;

- Err:
+Err:
  VP8LClear(dec);
  assert(dec->status != VP8_STATUS_OK);
  return 0;
--- a/src/dec/vp8li_dec.h
+++ b/src/dec/vp8li_dec.h
@@ -15,7 +15,7 @@
 #ifndef WEBP_DEC_VP8LI_DEC_H_
 #define WEBP_DEC_VP8LI_DEC_H_

-#include <string.h>     // for memcpy()
+#include <string.h>  // for memcpy()

 #include "src/dec/vp8_dec.h"
 #include "src/dec/webpi_dec.h"
@@ -31,69 +31,65 @@
 extern "C" {
 #endif

-typedef enum {
-  READ_DATA = 0,
-  READ_HDR = 1,
-  READ_DIM = 2
-} VP8LDecodeState;
+typedef enum { READ_DATA = 0, READ_HDR = 1, READ_DIM = 2 } VP8LDecodeState;

 typedef struct VP8LTransform VP8LTransform;
 struct VP8LTransform {
-  VP8LImageTransformType type;   // transform type.
-  int                    bits;   // subsampling bits defining transform window.
-  int                    xsize;  // transform window X index.
-  int                    ysize;  // transform window Y index.
-  uint32_t*              data;   // transform data.
+  VP8LImageTransformType type;  // transform type.
+  int bits;                     // subsampling bits defining transform window.
+  int xsize;                    // transform window X index.
+  int ysize;                    // transform window Y index.
+  uint32_t* data;               // transform data.
 };

 typedef struct {
-  int             color_cache_size;
-  VP8LColorCache  color_cache;
-  VP8LColorCache  saved_color_cache;  // for incremental
+  int color_cache_size;
+  VP8LColorCache color_cache;
+  VP8LColorCache saved_color_cache;  // for incremental

-  int             huffman_mask;
-  int             huffman_subsample_bits;
-  int             huffman_xsize;
-  uint32_t*       huffman_image;
-  int             num_htree_groups;
-  HTreeGroup*     htree_groups;
-  HuffmanTables   huffman_tables;
+  int huffman_mask;
+  int huffman_subsample_bits;
+  int huffman_xsize;
+  uint32_t* huffman_image;
+  int num_htree_groups;
+  HTreeGroup* htree_groups;
+  HuffmanTables huffman_tables;
 } VP8LMetadata;

 typedef struct VP8LDecoder VP8LDecoder;
 struct VP8LDecoder {
-  VP8StatusCode    status;
-  VP8LDecodeState  state;
-  VP8Io*           io;
+  VP8StatusCode status;
+  VP8LDecodeState state;
+  VP8Io* io;

-  const WebPDecBuffer* output;    // shortcut to io->opaque->output
+  const WebPDecBuffer* output;  // shortcut to io->opaque->output

-  uint32_t*        pixels;        // Internal data: either uint8_t* for alpha
-                                  // or uint32_t* for BGRA.
-  uint32_t*        argb_cache;    // Scratch buffer for temporary BGRA storage.
+  uint32_t* pixels;      // Internal data: either uint8_t* for alpha
+                         // or uint32_t* for BGRA.
+  uint32_t* argb_cache;  // Scratch buffer for temporary BGRA storage.

-  VP8LBitReader    br;
-  int              incremental;   // if true, incremental decoding is expected
-  VP8LBitReader    saved_br;      // note: could be local variables too
-  int              saved_last_pixel;
+  VP8LBitReader br;
+  int incremental;         // if true, incremental decoding is expected
+  VP8LBitReader saved_br;  // note: could be local variables too
+  int saved_last_pixel;

-  int              width;
-  int              height;
-  int              last_row;      // last input row decoded so far.
-  int              last_pixel;    // last pixel decoded so far. However, it may
-                                  // not be transformed, scaled and
-                                  // color-converted yet.
-  int              last_out_row;  // last row output so far.
+  int width;
+  int height;
+  int last_row;      // last input row decoded so far.
+  int last_pixel;    // last pixel decoded so far. However, it may
+                     // not be transformed, scaled and
+                     // color-converted yet.
+  int last_out_row;  // last row output so far.

-  VP8LMetadata     hdr;
+  VP8LMetadata hdr;

-  int              next_transform;
-  VP8LTransform    transforms[NUM_TRANSFORMS];
+  int next_transform;
+  VP8LTransform transforms[NUM_TRANSFORMS];
  // or'd bitset storing the transforms types.
-  uint32_t         transforms_seen;
+  uint32_t transforms_seen;

-  uint8_t*         rescaler_memory;  // Working memory for rescaling work.
-  WebPRescaler*    rescaler;         // Common rescaler for all channels.
+  uint8_t* rescaler_memory;  // Working memory for rescaling work.
+  WebPRescaler* rescaler;    // Common rescaler for all channels.
 };

 //------------------------------------------------------------------------------
@@ -144,7 +140,7 @@ WEBP_NODISCARD int ReadHuffmanCodesHelper(
 //------------------------------------------------------------------------------

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_DEC_VP8LI_DEC_H_
--- a/src/dec/webp_dec.c
+++ b/src/dec/webp_dec.c
@@ -99,8 +99,7 @@ static VP8StatusCode ParseRIFF(const uint8_t** const data,
 // *height_ptr and *flags_ptr are set to the corresponding values extracted
 // from the VP8X chunk.
 static VP8StatusCode ParseVP8X(const uint8_t** const data,
-                               size_t* const data_size,
-                               int* const found_vp8x,
+                               size_t* const data_size, int* const found_vp8x,
                               int* const width_ptr, int* const height_ptr,
                               uint32_t* const flags_ptr) {
  const uint32_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
@@ -173,7 +172,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,

  while (1) {
    uint32_t chunk_size;
-    uint32_t disk_chunk_size;   // chunk_size with padding
+    uint32_t disk_chunk_size;  // chunk_size with padding

    *data = buf;
    *data_size = buf_size;
@@ -184,7 +183,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,

    chunk_size = GetLE32(buf + TAG_SIZE);
    if (chunk_size > MAX_CHUNK_PAYLOAD) {
-      return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
+      return VP8_STATUS_BITSTREAM_ERROR;  // Not a valid chunk size.
    }
    // For odd-sized chunk-payload, there's one byte padding at the end.
    disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1u;
@@ -192,23 +191,22 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,

    // Check that total bytes skipped so far does not exceed riff_size.
    if (riff_size > 0 && (total_size > riff_size)) {
-      return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
+      return VP8_STATUS_BITSTREAM_ERROR;  // Not a valid chunk size.
    }

    // Start of a (possibly incomplete) VP8/VP8L chunk implies that we have
    // parsed all the optional chunks.
    // Note: This check must occur before the check 'buf_size < disk_chunk_size'
    // below to allow incomplete VP8/VP8L chunks.
-    if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
-        !memcmp(buf, "VP8L", TAG_SIZE)) {
+    if (!memcmp(buf, "VP8 ", TAG_SIZE) || !memcmp(buf, "VP8L", TAG_SIZE)) {
      return VP8_STATUS_OK;
    }

-    if (buf_size < disk_chunk_size) {             // Insufficient data.
+    if (buf_size < disk_chunk_size) {  // Insufficient data.
      return VP8_STATUS_NOT_ENOUGH_DATA;
    }

-    if (!memcmp(buf, "ALPH", TAG_SIZE)) {         // A valid ALPH header.
+    if (!memcmp(buf, "ALPH", TAG_SIZE)) {  // A valid ALPH header.
      *alpha_data = buf + CHUNK_HEADER_SIZE;
      *alpha_size = chunk_size;
    }
@@ -282,10 +280,8 @@ static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
 // RIFF + VP8X + (optional chunks) + VP8(L)
 // ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
 // VP8(L)     <-- Not a valid WebP format: only allowed for internal purpose.
-static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
-                                          size_t data_size,
-                                          int* const width,
-                                          int* const height,
+static VP8StatusCode ParseHeadersInternal(const uint8_t* data, size_t data_size,
+                                          int* const width, int* const height,
                                          int* const has_alpha,
                                          int* const has_animation,
                                          int* const format,
@@ -312,15 +308,15 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
  // Skip over RIFF header.
  status = ParseRIFF(&data, &data_size, have_all_data, &hdrs.riff_size);
  if (status != VP8_STATUS_OK) {
-    return status;   // Wrong RIFF header / insufficient data.
+    return status;  // Wrong RIFF header / insufficient data.
  }
  found_riff = (hdrs.riff_size > 0);

  // Skip over VP8X.
  {
    uint32_t flags = 0;
-    status = ParseVP8X(&data, &data_size, &found_vp8x,
-                       &canvas_width, &canvas_height, &flags);
+    status = ParseVP8X(&data, &data_size, &found_vp8x, &canvas_width,
+                       &canvas_height, &flags);
    if (status != VP8_STATUS_OK) {
      return status;  // Wrong VP8X / insufficient data.
    }
@@ -332,7 +328,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
    }
    if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
    if (has_animation != NULL) *has_animation = animation_present;
-    if (format != NULL) *format = 0;   // default = undefined
+    if (format != NULL) *format = 0;  // default = undefined

    image_width = canvas_width;
    image_height = canvas_height;
@@ -403,7 +399,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
    assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
    assert(headers->offset == headers->data_size - data_size);
  }
- ReturnWidthHeight:
+ReturnWidthHeight:
  if (status == VP8_STATUS_OK ||
      (status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) {
    if (has_alpha != NULL) {
@@ -425,9 +421,8 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
  int has_animation = 0;
  assert(headers != NULL);
  // fill out headers, ignore width/height/has_alpha.
-  status = ParseHeadersInternal(headers->data, headers->data_size,
-                                NULL, NULL, NULL, &has_animation,
-                                NULL, headers);
+  status = ParseHeadersInternal(headers->data, headers->data_size, NULL, NULL,
+                                NULL, &has_animation, NULL, headers);
  if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
    // The WebPDemux API + libwebp can be used to decode individual
    // uncomposited frames or the WebPAnimDecoder can be used to fully
@@ -462,7 +457,7 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
  headers.data = data;
  headers.data_size = data_size;
  headers.have_all_data = 1;
-  status = WebPParseHeaders(&headers);   // Process Pre-VP8 chunks.
+  status = WebPParseHeaders(&headers);  // Process Pre-VP8 chunks.
  if (status != VP8_STATUS_OK) {
    return status;
  }
@@ -485,15 +480,15 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,

    // Decode bitstream header, update io->width/io->height.
    if (!VP8GetHeaders(dec, &io)) {
-      status = dec->status;   // An error occurred. Grab error status.
+      status = dec->status;  // An error occurred. Grab error status.
    } else {
      // Allocate/check output buffers.
      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
                                     params->output);
      if (status == VP8_STATUS_OK) {  // Decode
        // This change must be done before calling VP8Decode()
-        dec->mt_method = VP8GetThreadMethod(params->options, &headers,
-                                            io.width, io.height);
+        dec->mt_method =
+            VP8GetThreadMethod(params->options, &headers, io.width, io.height);
        VP8InitDithering(params->options, dec);
        if (!VP8Decode(dec, &io)) {
          status = dec->status;
@@ -507,7 +502,7 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
      return VP8_STATUS_OUT_OF_MEMORY;
    }
    if (!VP8LDecodeHeader(dec, &io)) {
-      status = dec->status;   // An error occurred. Grab error status.
+      status = dec->status;  // An error occurred. Grab error status.
    } else {
      // Allocate/check output buffers.
      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
@@ -546,10 +541,10 @@ WEBP_NODISCARD static uint8_t* DecodeIntoRGBABuffer(WEBP_CSP_MODE colorspace,
  }
  WebPResetDecParams(&params);
  params.output = &buf;
-  buf.colorspace    = colorspace;
-  buf.u.RGBA.rgba   = rgba;
+  buf.colorspace = colorspace;
+  buf.u.RGBA.rgba = rgba;
  buf.u.RGBA.stride = stride;
-  buf.u.RGBA.size   = size;
+  buf.u.RGBA.size = size;
  buf.is_external_memory = 1;
  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
    return NULL;
@@ -582,25 +577,25 @@ uint8_t* WebPDecodeBGRAInto(const uint8_t* data, size_t data_size,
  return DecodeIntoRGBABuffer(MODE_BGRA, data, data_size, output, stride, size);
 }

-uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size,
-                           uint8_t* luma, size_t luma_size, int luma_stride,
-                           uint8_t* u, size_t u_size, int u_stride,
-                           uint8_t* v, size_t v_size, int v_stride) {
+uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size, uint8_t* luma,
+                           size_t luma_size, int luma_stride, uint8_t* u,
+                           size_t u_size, int u_stride, uint8_t* v,
+                           size_t v_size, int v_stride) {
  WebPDecParams params;
  WebPDecBuffer output;
  if (luma == NULL || !WebPInitDecBuffer(&output)) return NULL;
  WebPResetDecParams(&params);
  params.output = &output;
-  output.colorspace      = MODE_YUV;
-  output.u.YUVA.y        = luma;
+  output.colorspace = MODE_YUV;
+  output.u.YUVA.y = luma;
  output.u.YUVA.y_stride = luma_stride;
-  output.u.YUVA.y_size   = luma_size;
-  output.u.YUVA.u        = u;
+  output.u.YUVA.y_size = luma_size;
+  output.u.YUVA.u = u;
  output.u.YUVA.u_stride = u_stride;
-  output.u.YUVA.u_size   = u_size;
-  output.u.YUVA.v        = v;
+  output.u.YUVA.u_size = u_size;
+  output.u.YUVA.v = v;
  output.u.YUVA.v_stride = v_stride;
-  output.u.YUVA.v_size   = v_size;
+  output.u.YUVA.v_size = v_size;
  output.is_external_memory = 1;
  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
    return NULL;
@@ -636,50 +631,50 @@ WEBP_NODISCARD static uint8_t* Decode(WEBP_CSP_MODE mode,
  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
    return NULL;
  }
-  if (keep_info != NULL) {    // keep track of the side-info
+  if (keep_info != NULL) {  // keep track of the side-info
    WebPCopyDecBuffer(&output, keep_info);
  }
  // return decoded samples (don't clear 'output'!)
  return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y;
 }

-uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
-                       int* width, int* height) {
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size, int* width,
+                       int* height) {
  return Decode(MODE_RGB, data, data_size, width, height, NULL);
 }

-uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size,
-                        int* width, int* height) {
+uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size, int* width,
+                        int* height) {
  return Decode(MODE_RGBA, data, data_size, width, height, NULL);
 }

-uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size,
-                        int* width, int* height) {
+uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size, int* width,
+                        int* height) {
  return Decode(MODE_ARGB, data, data_size, width, height, NULL);
 }

-uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
-                       int* width, int* height) {
+uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, int* width,
+                       int* height) {
  return Decode(MODE_BGR, data, data_size, width, height, NULL);
 }

-uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
-                        int* width, int* height) {
+uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size, int* width,
+                        int* height) {
  return Decode(MODE_BGRA, data, data_size, width, height, NULL);
 }

-uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
-                       int* width, int* height, uint8_t** u, uint8_t** v,
-                       int* stride, int* uv_stride) {
+uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width,
+                       int* height, uint8_t** u, uint8_t** v, int* stride,
+                       int* uv_stride) {
  // data, width and height are checked by Decode().
  if (u == NULL || v == NULL || stride == NULL || uv_stride == NULL) {
    return NULL;
  }

  {
-    WebPDecBuffer output;   // only to preserve the side-infos
-    uint8_t* const out = Decode(MODE_YUV, data, data_size,
-                                width, height, &output);
+    WebPDecBuffer output;  // only to preserve the side-infos
+    uint8_t* const out =
+        Decode(MODE_YUV, data, data_size, width, height, &output);

    if (out != NULL) {
      const WebPYUVABuffer* const buf = &output.u.YUVA;
@@ -706,17 +701,16 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
  DefaultFeatures(features);

  // Only parse enough of the data to retrieve the features.
-  return ParseHeadersInternal(data, data_size,
-                              &features->width, &features->height,
-                              &features->has_alpha, &features->has_animation,
-                              &features->format, NULL);
+  return ParseHeadersInternal(
+      data, data_size, &features->width, &features->height,
+      &features->has_alpha, &features->has_animation, &features->format, NULL);
 }

 //------------------------------------------------------------------------------
 // WebPGetInfo()

-int WebPGetInfo(const uint8_t* data, size_t data_size,
-                int* width, int* height) {
+int WebPGetInfo(const uint8_t* data, size_t data_size, int* width,
+                int* height) {
  WebPBitstreamFeatures features;

  if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) {
@@ -724,7 +718,7 @@ int WebPGetInfo(const uint8_t* data, size_t data_size,
  }

  if (width != NULL) {
-    *width  = features.width;
+    *width = features.width;
  }
  if (height != NULL) {
    *height = features.height;
@@ -736,10 +730,9 @@ int WebPGetInfo(const uint8_t* data, size_t data_size,
 //------------------------------------------------------------------------------
 // Advance decoding API

-int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
-                                  int version) {
+int WebPInitDecoderConfigInternal(WebPDecoderConfig* config, int version) {
  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
-    return 0;   // version mismatch
+    return 0;  // version mismatch
  }
  if (config == NULL) {
    return 0;
@@ -811,7 +804,7 @@ VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
                                      WebPBitstreamFeatures* features,
                                      int version) {
  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
-    return VP8_STATUS_INVALID_PARAM;   // version mismatch
+    return VP8_STATUS_INVALID_PARAM;  // version mismatch
  }
  if (features == NULL) {
    return VP8_STATUS_INVALID_PARAM;
@@ -864,8 +857,8 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
 //------------------------------------------------------------------------------
 // Cropping and rescaling.

-int WebPCheckCropDimensions(int image_width, int image_height,
-                            int x, int y, int w, int h) {
+int WebPCheckCropDimensions(int image_width, int image_height, int x, int y,
+                            int w, int h) {
  return WebPCheckCropDimensionsBasic(x, y, w, h) &&
         !(x >= image_width || w > image_width || w > image_width - x ||
           y >= image_height || h > image_height || h > image_height - y);
@@ -884,7 +877,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
    h = options->crop_height;
    x = options->crop_left;
    y = options->crop_top;
-    if (!WebPIsRGBMode(src_colorspace)) {   // only snap for YUV420
+    if (!WebPIsRGBMode(src_colorspace)) {  // only snap for YUV420
      x &= ~1;
      y &= ~1;
    }
@@ -892,9 +885,9 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
      return 0;  // out of frame boundary error
    }
  }
-  io->crop_left   = x;
-  io->crop_top    = y;
-  io->crop_right  = x + w;
+  io->crop_left = x;
+  io->crop_top = y;
+  io->crop_right = x + w;
  io->crop_bottom = y + h;
  io->mb_w = w;
  io->mb_h = h;
@@ -921,8 +914,8 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,

  if (io->use_scaling) {
    // disable filter (only for large downscaling ratio).
-    io->bypass_filtering |= (io->scaled_width < W * 3 / 4) &&
-                            (io->scaled_height < H * 3 / 4);
+    io->bypass_filtering |=
+        (io->scaled_width < W * 3 / 4) && (io->scaled_height < H * 3 / 4);
    io->fancy_upsampling = 0;
  }
  return 1;
--- a/src/dec/webpi_dec.h
+++ b/src/dec/webpi_dec.h
@@ -36,15 +36,15 @@ typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos,
                             int max_out_lines);

 struct WebPDecParams {
-  WebPDecBuffer* output;             // output buffer.
-  uint8_t* tmp_y, *tmp_u, *tmp_v;    // cache for the fancy upsampler
-                                     // or used for tmp rescaling
+  WebPDecBuffer* output;           // output buffer.
+  uint8_t *tmp_y, *tmp_u, *tmp_v;  // cache for the fancy upsampler
+                                   // or used for tmp rescaling

-  int last_y;                 // coordinate of the line that was last output
+  int last_y;  // coordinate of the line that was last output
  const WebPDecoderOptions* options;  // if not NULL, use alt decoding features

-  WebPRescaler* scaler_y, *scaler_u, *scaler_v, *scaler_a;  // rescalers
-  void* memory;                  // overall scratch memory for the output work.
+  WebPRescaler *scaler_y, *scaler_u, *scaler_v, *scaler_a;  // rescalers
+  void* memory;  // overall scratch memory for the output work.

  OutputFunc emit;               // output RGB or YUV samples
  OutputAlphaFunc emit_alpha;    // output alpha channel
@@ -59,15 +59,15 @@ void WebPResetDecParams(WebPDecParams* const params);

 // Structure storing a description of the RIFF headers.
 typedef struct {
-  const uint8_t* data;         // input buffer
-  size_t data_size;            // input buffer size
-  int have_all_data;           // true if all data is known to be available
-  size_t offset;               // offset to main data chunk (VP8 or VP8L)
-  const uint8_t* alpha_data;   // points to alpha chunk (if present)
-  size_t alpha_data_size;      // alpha chunk size
-  size_t compressed_size;      // VP8/VP8L compressed data size
-  size_t riff_size;            // size of the riff payload (or 0 if absent)
-  int is_lossless;             // true if a VP8L chunk is present
+  const uint8_t* data;        // input buffer
+  size_t data_size;           // input buffer size
+  int have_all_data;          // true if all data is known to be available
+  size_t offset;              // offset to main data chunk (VP8 or VP8L)
+  const uint8_t* alpha_data;  // points to alpha chunk (if present)
+  size_t alpha_data_size;     // alpha chunk size
+  size_t compressed_size;     // VP8/VP8L compressed data size
+  size_t riff_size;           // size of the riff payload (or 0 if absent)
+  int is_lossless;            // true if a VP8L chunk is present
 } WebPHeaderStructure;

 // Skips over all valid chunks prior to the first VP8/VP8L frame header.
@@ -82,8 +82,8 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
 // Misc utils

 // Returns true if crop dimensions are within image bounds.
-int WebPCheckCropDimensions(int image_width, int image_height,
-                            int x, int y, int w, int h);
+int WebPCheckCropDimensions(int image_width, int image_height, int x, int y,
+                            int w, int h);

 // Initializes VP8Io with custom setup, io and teardown functions. The default
 // hooks will use the supplied 'params' as io->opaque handle.
@@ -136,7 +136,7 @@ int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
 //------------------------------------------------------------------------------

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_DEC_WEBPI_DEC_H_
--- a/src/demux/anim_decode.c
+++ b/src/demux/anim_decode.c
@@ -41,19 +41,19 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
                                 int num_pixels);

 struct WebPAnimDecoder {
-  WebPDemuxer* demux;              // Demuxer created from given WebP bitstream.
-  WebPDecoderConfig config;        // Decoder config.
+  WebPDemuxer* demux;        // Demuxer created from given WebP bitstream.
+  WebPDecoderConfig config;  // Decoder config.
  // Note: we use a pointer to a function blending multiple pixels at a time to
  // allow possible inlining of per-pixel blending function.
-  BlendRowFunc blend_func;         // Pointer to the chose blend row function.
-  WebPAnimInfo info;               // Global info about the animation.
-  uint8_t* curr_frame;             // Current canvas (not disposed).
-  uint8_t* prev_frame_disposed;    // Previous canvas (properly disposed).
-  int prev_frame_timestamp;        // Previous frame timestamp (milliseconds).
-  WebPIterator prev_iter;          // Iterator object for previous frame.
-  int prev_frame_was_keyframe;     // True if previous frame was a keyframe.
-  int next_frame;                  // Index of the next frame to be decoded
-                                   // (starting from 1).
+  BlendRowFunc blend_func;       // Pointer to the chose blend row function.
+  WebPAnimInfo info;             // Global info about the animation.
+  uint8_t* curr_frame;           // Current canvas (not disposed).
+  uint8_t* prev_frame_disposed;  // Previous canvas (properly disposed).
+  int prev_frame_timestamp;      // Previous frame timestamp (milliseconds).
+  WebPIterator prev_iter;        // Iterator object for previous frame.
+  int prev_frame_was_keyframe;   // True if previous frame was a keyframe.
+  int next_frame;                // Index of the next frame to be decoded
+                                 // (starting from 1).
 };

 static void DefaultDecoderOptions(WebPAnimDecoderOptions* const dec_options) {
@@ -79,8 +79,8 @@ WEBP_NODISCARD static int ApplyDecoderOptions(
  assert(dec_options != NULL);

  mode = dec_options->color_mode;
-  if (mode != MODE_RGBA && mode != MODE_BGRA &&
-      mode != MODE_rgbA && mode != MODE_bgrA) {
+  if (mode != MODE_RGBA && mode != MODE_BGRA && mode != MODE_rgbA &&
+      mode != MODE_bgrA) {
    return 0;
  }
  dec->blend_func = (mode == MODE_RGBA || mode == MODE_BGRA)
@@ -145,7 +145,7 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(
  WebPAnimDecoderReset(dec);
  return dec;

- Error:
+Error:
  WebPAnimDecoderDelete(dec);
  return NULL;
 }
@@ -197,13 +197,13 @@ WEBP_NODISCARD static int CopyCanvas(const uint8_t* src, uint8_t* dst,
 // Returns true if the current frame is a key-frame.
 static int IsKeyFrame(const WebPIterator* const curr,
                      const WebPIterator* const prev,
-                      int prev_frame_was_key_frame,
-                      int canvas_width, int canvas_height) {
+                      int prev_frame_was_key_frame, int canvas_width,
+                      int canvas_height) {
  if (curr->frame_num == 1) {
    return 1;
  } else if ((!curr->has_alpha || curr->blend_method == WEBP_MUX_NO_BLEND) &&
-             IsFullFrame(curr->width, curr->height,
-                         canvas_width, canvas_height)) {
+             IsFullFrame(curr->width, curr->height, canvas_width,
+                         canvas_height)) {
    return 1;
  } else {
    return (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) &&
@@ -213,12 +213,11 @@ static int IsKeyFrame(const WebPIterator* const curr,
  }
 }

-
 // Blend a single channel of 'src' over 'dst', given their alpha channel values.
 // 'src' and 'dst' are assumed to be NOT pre-multiplied by alpha.
-static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
-                                      uint32_t dst, uint8_t dst_a,
-                                      uint32_t scale, int shift) {
+static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a, uint32_t dst,
+                                      uint8_t dst_a, uint32_t scale,
+                                      int shift) {
  const uint8_t src_channel = (src >> shift) & 0xff;
  const uint8_t dst_channel = (dst >> shift) & 0xff;
  const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
@@ -328,8 +327,8 @@ static void FindBlendRangeAtRow(const WebPIterator* const src,
  }
 }

-int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
-                           uint8_t** buf_ptr, int* timestamp_ptr) {
+int WebPAnimDecoderGetNext(WebPAnimDecoder* dec, uint8_t** buf_ptr,
+                           int* timestamp_ptr) {
  WebPIterator iter;
  uint32_t width;
  uint32_t height;
@@ -358,8 +357,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
      goto Error;
    }
  } else {
-    if (!CopyCanvas(dec->prev_frame_disposed, dec->curr_frame,
-                    width, height)) {
+    if (!CopyCanvas(dec->prev_frame_disposed, dec->curr_frame, width, height)) {
      goto Error;
    }
  }
@@ -394,8 +392,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
      int y;
      // Blend transparent pixels with pixels in previous canvas.
      for (y = 0; y < iter.height; ++y) {
-        const size_t offset =
-            (iter.y_offset + y) * width + iter.x_offset;
+        const size_t offset = (iter.y_offset + y) * width + iter.x_offset;
        blend_row((uint32_t*)dec->curr_frame + offset,
                  (uint32_t*)dec->prev_frame_disposed + offset, iter.width);
      }
@@ -445,7 +442,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
  *timestamp_ptr = timestamp;
  return 1;

- Error:
+Error:
  WebPDemuxReleaseIterator(&iter);
  return 0;
 }
--- a/src/demux/demux.c
+++ b/src/demux/demux.c
@@ -19,7 +19,7 @@
 #include <string.h>

 #include "src/utils/utils.h"
-#include "src/webp/decode.h"     // WebPGetFeatures
+#include "src/webp/decode.h"  // WebPGetFeatures
 #include "src/webp/demux.h"
 #include "src/webp/format_constants.h"
 #include "src/webp/mux.h"
@@ -31,10 +31,10 @@
 #define DMUX_REV_VERSION 0

 typedef struct {
-  size_t start;         // start location of the data
-  size_t end;           // end location
-  size_t riff_end;      // riff chunk end location, can be > end.
-  size_t buf_size;      // size of the buffer
+  size_t start;     // start location of the data
+  size_t end;       // end location
+  size_t riff_end;  // riff chunk end location, can be > end.
+  size_t buf_size;  // size of the buffer
  const uint8_t* buf;
 } MemBuffer;

@@ -51,7 +51,7 @@ typedef struct Frame {
  WebPMuxAnimDispose dispose_method;
  WebPMuxAnimBlend blend_method;
  int frame_num;
-  int complete;   // img_components contains a full image.
+  int complete;                 // img_components contains a full image.
  ChunkData img_components[2];  // 0=VP8{,L} 1=ALPH
  struct Frame* next;
 } Frame;
@@ -76,11 +76,7 @@ struct WebPDemuxer {
  Chunk** chunks_tail;
 };

-typedef enum {
-  PARSE_OK,
-  PARSE_NEED_MORE_DATA,
-  PARSE_ERROR
-} ParseStatus;
+typedef enum { PARSE_OK, PARSE_NEED_MORE_DATA, PARSE_ERROR } ParseStatus;

 typedef struct ChunkParser {
  uint8_t id[4];
@@ -94,10 +90,10 @@ static int IsValidSimpleFormat(const WebPDemuxer* const dmux);
 static int IsValidExtendedFormat(const WebPDemuxer* const dmux);

 static const ChunkParser kMasterChunks[] = {
-  { { 'V', 'P', '8', ' ' }, ParseSingleImage, IsValidSimpleFormat },
-  { { 'V', 'P', '8', 'L' }, ParseSingleImage, IsValidSimpleFormat },
-  { { 'V', 'P', '8', 'X' }, ParseVP8X,        IsValidExtendedFormat },
-  { { '0', '0', '0', '0' }, NULL,             NULL },
+    {{'V', 'P', '8', ' '}, ParseSingleImage, IsValidSimpleFormat},
+    {{'V', 'P', '8', 'L'}, ParseSingleImage, IsValidSimpleFormat},
+    {{'V', 'P', '8', 'X'}, ParseVP8X, IsValidExtendedFormat},
+    {{'0', '0', '0', '0'}, NULL, NULL},
 };

 //------------------------------------------------------------------------------
@@ -109,8 +105,8 @@ int WebPGetDemuxVersion(void) {
 // -----------------------------------------------------------------------------
 // MemBuffer

-static int RemapMemBuffer(MemBuffer* const mem,
-                          const uint8_t* data, size_t size) {
+static int RemapMemBuffer(MemBuffer* const mem, const uint8_t* data,
+                          size_t size) {
  if (size < mem->buf_size) return 0;  // can't remap to a shorter buffer!

  mem->buf = data;
@@ -118,8 +114,8 @@ static int RemapMemBuffer(MemBuffer* const mem,
  return 1;
 }

-static int InitMemBuffer(MemBuffer* const mem,
-                         const uint8_t* data, size_t size) {
+static int InitMemBuffer(MemBuffer* const mem, const uint8_t* data,
+                         size_t size) {
  memset(mem, 0, sizeof(*mem));
  return RemapMemBuffer(mem, data, size);
 }
@@ -195,8 +191,8 @@ static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
  return 1;
 }

-static void SetFrameInfo(size_t start_offset, size_t size,
-                         int frame_num, int complete,
+static void SetFrameInfo(size_t start_offset, size_t size, int frame_num,
+                         int complete,
                         const WebPBitstreamFeatures* const features,
                         Frame* const frame) {
  frame->img_components[0].offset = start_offset;
@@ -214,8 +210,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
                              MemBuffer* const mem, Frame* const frame) {
  int alpha_chunks = 0;
  int image_chunks = 0;
-  int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
-              MemDataSize(mem) < min_size);
+  int done =
+      (MemDataSize(mem) < CHUNK_HEADER_SIZE || MemDataSize(mem) < min_size);
  ParseStatus status = PARSE_OK;

  if (done) return PARSE_NEED_MORE_DATA;
@@ -232,7 +228,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,

    payload_size_padded = payload_size + (payload_size & 1);
    payload_available = (payload_size_padded > MemDataSize(mem))
-                      ? MemDataSize(mem) : payload_size_padded;
+                            ? MemDataSize(mem)
+                            : payload_size_padded;
    chunk_size = CHUNK_HEADER_SIZE + payload_available;
    if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR;
    if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA;
@@ -258,9 +255,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
          // Extract the bitstream features, tolerating failures when the data
          // is incomplete.
          WebPBitstreamFeatures features;
-          const VP8StatusCode vp8_status =
-              WebPGetFeatures(mem->buf + chunk_start_offset, chunk_size,
-                              &features);
+          const VP8StatusCode vp8_status = WebPGetFeatures(
+              mem->buf + chunk_start_offset, chunk_size, &features);
          if (status == PARSE_NEED_MORE_DATA &&
              vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
            return PARSE_NEED_MORE_DATA;
@@ -276,7 +272,7 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
          goto Done;
        }
        break;
- Done:
+      Done:
      default:
        // Restore fourcc/size when moving up one level in parsing.
        Rewind(mem, CHUNK_HEADER_SIZE);
@@ -298,12 +294,11 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
 // enough data ('min_size') to parse the payload.
 // Returns PARSE_OK on success with *frame pointing to the new Frame.
 // Returns PARSE_NEED_MORE_DATA with insufficient data, PARSE_ERROR otherwise.
-static ParseStatus NewFrame(const MemBuffer* const mem,
-                            uint32_t min_size, uint32_t actual_size,
-                            Frame** frame) {
+static ParseStatus NewFrame(const MemBuffer* const mem, uint32_t min_size,
+                            uint32_t actual_size, Frame** frame) {
  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
  if (actual_size < min_size) return PARSE_ERROR;
-  if (MemDataSize(mem) < min_size)  return PARSE_NEED_MORE_DATA;
+  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;

  *frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(**frame));
  return (*frame == NULL) ? PARSE_ERROR : PARSE_OK;
@@ -311,8 +306,8 @@ static ParseStatus NewFrame(const MemBuffer* const mem,

 // Parse a 'ANMF' chunk and any image bearing chunks that immediately follow.
 // 'frame_chunk_size' is the previously validated, padded chunk size.
-static ParseStatus ParseAnimationFrame(
-    WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
+static ParseStatus ParseAnimationFrame(WebPDemuxer* const dmux,
+                                       uint32_t frame_chunk_size) {
  const int is_animation = !!(dmux->feature_flags & ANIMATION_FLAG);
  const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
  int added_frame = 0;
@@ -320,15 +315,14 @@ static ParseStatus ParseAnimationFrame(
  MemBuffer* const mem = &dmux->mem;
  Frame* frame;
  size_t start_offset;
-  ParseStatus status =
-      NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
+  ParseStatus status = NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
  if (status != PARSE_OK) return status;

-  frame->x_offset       = 2 * ReadLE24s(mem);
-  frame->y_offset       = 2 * ReadLE24s(mem);
-  frame->width          = 1 + ReadLE24s(mem);
-  frame->height         = 1 + ReadLE24s(mem);
-  frame->duration       = ReadLE24s(mem);
+  frame->x_offset = 2 * ReadLE24s(mem);
+  frame->y_offset = 2 * ReadLE24s(mem);
+  frame->width = 1 + ReadLE24s(mem);
+  frame->height = 1 + ReadLE24s(mem);
+  frame->duration = ReadLE24s(mem);
  bits = ReadByte(mem);
  frame->dispose_method =
      (bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
@@ -362,8 +356,8 @@ static ParseStatus ParseAnimationFrame(
 // the user to request the payload via a fourcc string. 'size' includes the
 // header and the unpadded payload size.
 // Returns true on success, false otherwise.
-static int StoreChunk(WebPDemuxer* const dmux,
-                      size_t start_offset, uint32_t size) {
+static int StoreChunk(WebPDemuxer* const dmux, size_t start_offset,
+                      uint32_t size) {
  Chunk* const chunk = (Chunk*)WebPSafeCalloc(1ULL, sizeof(*chunk));
  if (chunk == NULL) return 0;

@@ -512,7 +506,7 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
        store_chunk = !!(dmux->feature_flags & XMP_FLAG);
        goto Skip;
      }
- Skip:
+      Skip:
      default: {
        if (chunk_size_padded <= MemDataSize(mem)) {
          if (store_chunk) {
@@ -557,7 +551,7 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {

  dmux->feature_flags = ReadByte(mem);
  Skip(mem, 3);  // Reserved.
-  dmux->canvas_width  = 1 + ReadLE24s(mem);
+  dmux->canvas_width = 1 + ReadLE24s(mem);
  dmux->canvas_height = 1 + ReadLE24s(mem);
  if (dmux->canvas_width * (uint64_t)dmux->canvas_height >= MAX_IMAGE_AREA) {
    return PARSE_ERROR;  // image final dimension is too large
@@ -647,8 +641,8 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
      }

      if (f->width > 0 && f->height > 0 &&
-          !CheckFrameBounds(f, !is_animation,
-                            dmux->canvas_width, dmux->canvas_height)) {
+          !CheckFrameBounds(f, !is_animation, dmux->canvas_width,
+                            dmux->canvas_height)) {
        return 0;
      }
    }
@@ -698,7 +692,7 @@ static ParseStatus CreateRawImageDemuxer(MemBuffer* const mem,
    *demuxer = dmux;
    return PARSE_OK;

- Error:
+  Error:
    WebPSafeFree(dmux);
    WebPSafeFree(frame);
    return PARSE_ERROR;
@@ -788,12 +782,18 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
  if (dmux == NULL) return 0;

  switch (feature) {
-    case WEBP_FF_FORMAT_FLAGS:     return dmux->feature_flags;
-    case WEBP_FF_CANVAS_WIDTH:     return (uint32_t)dmux->canvas_width;
-    case WEBP_FF_CANVAS_HEIGHT:    return (uint32_t)dmux->canvas_height;
-    case WEBP_FF_LOOP_COUNT:       return (uint32_t)dmux->loop_count;
-    case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor;
-    case WEBP_FF_FRAME_COUNT:      return (uint32_t)dmux->num_frames;
+    case WEBP_FF_FORMAT_FLAGS:
+      return dmux->feature_flags;
+    case WEBP_FF_CANVAS_WIDTH:
+      return (uint32_t)dmux->canvas_width;
+    case WEBP_FF_CANVAS_HEIGHT:
+      return (uint32_t)dmux->canvas_height;
+    case WEBP_FF_LOOP_COUNT:
+      return (uint32_t)dmux->loop_count;
+    case WEBP_FF_BACKGROUND_COLOR:
+      return dmux->bgcolor;
+    case WEBP_FF_FRAME_COUNT:
+      return (uint32_t)dmux->num_frames;
  }
  return 0;
 }
@@ -822,11 +822,11 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
    // if alpha exists it precedes image, update the size allowing for
    // intervening chunks.
    if (alpha->size > 0) {
-      const size_t inter_size = (image->offset > 0)
-                              ? image->offset - (alpha->offset + alpha->size)
+      const size_t inter_size =
+          (image->offset > 0) ? image->offset - (alpha->offset + alpha->size)
                              : 0;
      start_offset = alpha->offset;
-      *data_size  += alpha->size + inter_size;
+      *data_size += alpha->size + inter_size;
    }
    return mem_buf + start_offset;
  }
@@ -835,27 +835,26 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,

 // Create a whole 'frame' from VP8 (+ alpha) or lossless.
 static int SynthesizeFrame(const WebPDemuxer* const dmux,
-                           const Frame* const frame,
-                           WebPIterator* const iter) {
+                           const Frame* const frame, WebPIterator* const iter) {
  const uint8_t* const mem_buf = dmux->mem.buf;
  size_t payload_size = 0;
  const uint8_t* const payload = GetFramePayload(mem_buf, frame, &payload_size);
  if (payload == NULL) return 0;
  assert(frame != NULL);

-  iter->frame_num      = frame->frame_num;
-  iter->num_frames     = dmux->num_frames;
-  iter->x_offset       = frame->x_offset;
-  iter->y_offset       = frame->y_offset;
-  iter->width          = frame->width;
-  iter->height         = frame->height;
-  iter->has_alpha      = frame->has_alpha;
-  iter->duration       = frame->duration;
+  iter->frame_num = frame->frame_num;
+  iter->num_frames = dmux->num_frames;
+  iter->x_offset = frame->x_offset;
+  iter->y_offset = frame->y_offset;
+  iter->width = frame->width;
+  iter->height = frame->height;
+  iter->has_alpha = frame->has_alpha;
+  iter->duration = frame->duration;
  iter->dispose_method = frame->dispose_method;
-  iter->blend_method   = frame->blend_method;
-  iter->complete       = frame->complete;
+  iter->blend_method = frame->blend_method;
+  iter->complete = frame->complete;
  iter->fragment.bytes = payload;
-  iter->fragment.size  = payload_size;
+  iter->fragment.size = payload_size;
  return 1;
 }

@@ -891,9 +890,7 @@ int WebPDemuxPrevFrame(WebPIterator* iter) {
  return SetFrame(iter->frame_num - 1, iter);
 }

-void WebPDemuxReleaseIterator(WebPIterator* iter) {
-  (void)iter;
-}
+void WebPDemuxReleaseIterator(WebPIterator* iter) { (void)iter; }

 // -----------------------------------------------------------------------------
 // Chunk iteration
@@ -936,17 +933,16 @@ static int SetChunk(const char fourcc[4], int chunk_num,
    const uint8_t* const mem_buf = dmux->mem.buf;
    const Chunk* const chunk = GetChunk(dmux, fourcc, chunk_num);
    iter->chunk.bytes = mem_buf + chunk->data.offset + CHUNK_HEADER_SIZE;
-    iter->chunk.size  = chunk->data.size - CHUNK_HEADER_SIZE;
-    iter->num_chunks  = count;
-    iter->chunk_num   = chunk_num;
+    iter->chunk.size = chunk->data.size - CHUNK_HEADER_SIZE;
+    iter->num_chunks = count;
+    iter->chunk_num = chunk_num;
    return 1;
  }
  return 0;
 }

-int WebPDemuxGetChunk(const WebPDemuxer* dmux,
-                      const char fourcc[4], int chunk_num,
-                      WebPChunkIterator* iter) {
+int WebPDemuxGetChunk(const WebPDemuxer* dmux, const char fourcc[4],
+                      int chunk_num, WebPChunkIterator* iter) {
  if (iter == NULL) return 0;

  memset(iter, 0, sizeof(*iter));
@@ -972,6 +968,4 @@ int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
  return 0;
 }

-void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) {
-  (void)iter;
-}
+void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) { (void)iter; }
--- a/src/dsp/alpha_processing.c
+++ b/src/dsp/alpha_processing.c
@@ -20,13 +20,12 @@

 // Tables can be faster on some platform but incur some extra binary size (~2k).
 #if !defined(USE_TABLES_FOR_ALPHA_MULT)
-#define USE_TABLES_FOR_ALPHA_MULT 0   // ALTERNATE_CODE
+#define USE_TABLES_FOR_ALPHA_MULT 0  // ALTERNATE_CODE
 #endif

-
 // -----------------------------------------------------------------------------

-#define MFIX 24    // 24bit fixed-point arithmetic
+#define MFIX 24  // 24bit fixed-point arithmetic
 #define HALF ((1u << MFIX) >> 1)
 #define KINV_255 ((1u << MFIX) / 255u)

@@ -39,95 +38,94 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
 #if (USE_TABLES_FOR_ALPHA_MULT == 1)

 static const uint32_t kMultTables[2][256] = {
-  {    // (255u << MFIX) / alpha
-    0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
-    0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
-    0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
-    0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
-    0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
-    0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
-    0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
-    0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
-    0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
-    0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
-    0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
-    0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
-    0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
-    0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
-    0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
-    0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
-    0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
-    0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
-    0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
-    0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
-    0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
-    0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
-    0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
-    0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
-    0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
-    0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
-    0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
-    0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
-    0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
-    0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
-    0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
-    0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
-    0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
-    0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
-    0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
-    0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
-    0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
-    0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
-    0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
-    0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
-    0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
-    0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
-    0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
-  {   // alpha * KINV_255
-    0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
-    0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
-    0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
-    0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
-    0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
-    0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
-    0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
-    0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
-    0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
-    0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
-    0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
-    0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
-    0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
-    0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
-    0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
-    0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
-    0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
-    0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
-    0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
-    0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
-    0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
-    0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
-    0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
-    0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
-    0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
-    0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
-    0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
-    0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
-    0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
-    0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
-    0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
-    0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
-    0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
-    0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
-    0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
-    0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
-    0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
-    0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
-    0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
-    0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
-    0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
-    0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
-    0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
-};
+    // (255u << MFIX) / alpha
+    {0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
+     0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
+     0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
+     0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
+     0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
+     0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
+     0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
+     0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
+     0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
+     0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
+     0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
+     0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
+     0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
+     0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
+     0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
+     0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
+     0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
+     0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
+     0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
+     0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
+     0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
+     0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
+     0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
+     0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
+     0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
+     0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
+     0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
+     0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
+     0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
+     0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
+     0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
+     0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
+     0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
+     0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
+     0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
+     0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
+     0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
+     0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
+     0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
+     0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
+     0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
+     0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
+     0x01030c30, 0x01020612, 0x01010204, 0x01000000},
+    // alpha * KINV_255
+    {0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
+     0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
+     0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
+     0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
+     0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
+     0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
+     0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
+     0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
+     0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
+     0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
+     0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
+     0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
+     0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
+     0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
+     0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
+     0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
+     0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
+     0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
+     0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
+     0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
+     0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
+     0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
+     0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
+     0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
+     0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
+     0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
+     0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
+     0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
+     0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
+     0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
+     0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
+     0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
+     0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
+     0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
+     0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
+     0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
+     0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
+     0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
+     0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
+     0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
+     0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
+     0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
+     0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff}};

 static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
  return kMultTables[!inverse][a];
@@ -145,15 +143,15 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
  int x;
  for (x = 0; x < width; ++x) {
    const uint32_t argb = ptr[x];
-    if (argb < 0xff000000u) {      // alpha < 255
-      if (argb <= 0x00ffffffu) {   // alpha == 0
+    if (argb < 0xff000000u) {     // alpha < 255
+      if (argb <= 0x00ffffffu) {  // alpha == 0
        ptr[x] = 0;
      } else {
        const uint32_t alpha = (argb >> 24) & 0xff;
        const uint32_t scale = GetScale(alpha, inverse);
        uint32_t out = argb & 0xff000000u;
-        out |= Mult(argb >>  0, scale) <<  0;
-        out |= Mult(argb >>  8, scale) <<  8;
+        out |= Mult(argb >> 0, scale) << 0;
+        out |= Mult(argb >> 8, scale) << 8;
        out |= Mult(argb >> 16, scale) << 16;
        ptr[x] = out;
      }
@@ -162,8 +160,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
 }

 void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
-                   const uint8_t* WEBP_RESTRICT const alpha,
-                   int width, int inverse) {
+                   const uint8_t* WEBP_RESTRICT const alpha, int width,
+                   int inverse) {
  int x;
  for (x = 0; x < width; ++x) {
    const uint32_t a = alpha[x];
@@ -184,8 +182,8 @@ void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,

 void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
 void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
-                    const uint8_t* WEBP_RESTRICT const alpha,
-                    int width, int inverse);
+                    const uint8_t* WEBP_RESTRICT const alpha, int width,
+                    int inverse);

 //------------------------------------------------------------------------------
 // Generic per-plane calls
@@ -218,17 +216,17 @@ void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
 // (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
 // for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
 // one can use instead: (x * a * 65793 + (1 << 23)) >> 24
-#if 1     // (int)(x * a / 255.)
-#define MULTIPLIER(a)   ((a) * 32897U)
+#if 1  // (int)(x * a / 255.)
+#define MULTIPLIER(a) ((a) * 32897U)
 #define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
-#else     // (int)(x * a / 255. + .5)
+#else  // (int)(x * a / 255. + .5)
 #define MULTIPLIER(a) ((a) * 65793U)
 #define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
 #endif

 #if !WEBP_NEON_OMIT_C_CODE
-static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
-                                 int w, int h, int stride) {
+static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first, int w, int h,
+                                 int stride) {
  while (h-- > 0) {
    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
@@ -251,7 +249,7 @@ static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,

 // rgbA4444

-#define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
+#define MULTIPLIER(a) ((a) * 0x1111)  // 0x1111 ~= (1 << 16) / 15

 static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
  return (x & 0xf0) | (x >> 4);
@@ -265,8 +263,8 @@ static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
  return (x * m) >> 16;
 }

-static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
-                                                 int w, int h, int stride,
+static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444, int w,
+                                                 int h, int stride,
                                                 int rg_byte_pos /* 0 or 1 */) {
  while (h-- > 0) {
    int i;
@@ -286,8 +284,8 @@ static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
 }
 #undef MULTIPLIER

-static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
-                                     int w, int h, int stride) {
+static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444, int w, int h,
+                                     int stride) {
 #if (WEBP_SWAP_16BIT_CSP == 1)
  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
 #else
@@ -297,8 +295,8 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,

 #if !WEBP_NEON_OMIT_C_CODE
 static int DispatchAlpha_C(const uint8_t* WEBP_RESTRICT alpha, int alpha_stride,
-                           int width, int height,
-                           uint8_t* WEBP_RESTRICT dst, int dst_stride) {
+                           int width, int height, uint8_t* WEBP_RESTRICT dst,
+                           int dst_stride) {
  uint32_t alpha_mask = 0xff;
  int i, j;

@@ -330,8 +328,8 @@ static void DispatchAlphaToGreen_C(const uint8_t* WEBP_RESTRICT alpha,
 }

 static int ExtractAlpha_C(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
-                          int width, int height,
-                          uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
+                          int width, int height, uint8_t* WEBP_RESTRICT alpha,
+                          int alpha_stride) {
  uint8_t alpha_mask = 0xff;
  int i, j;

@@ -357,19 +355,22 @@ static void ExtractGreen_C(const uint32_t* WEBP_RESTRICT argb,
 //------------------------------------------------------------------------------

 static int HasAlpha8b_C(const uint8_t* src, int length) {
-  while (length-- > 0) if (*src++ != 0xff) return 1;
+  while (length-- > 0)
+    if (*src++ != 0xff) return 1;
  return 0;
 }

 static int HasAlpha32b_C(const uint8_t* src, int length) {
  int x;
-  for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
+  for (x = 0; length-- > 0; x += 4)
+    if (src[x] != 0xff) return 1;
  return 0;
 }

 static void AlphaReplace_C(uint32_t* src, int length, uint32_t color) {
  int x;
-  for (x = 0; x < length; ++x) if ((src[x] >> 24) == 0) src[x] = color;
+  for (x = 0; x < length; ++x)
+    if ((src[x] >> 24) == 0) src[x] = color;
 }

 //------------------------------------------------------------------------------
@@ -383,8 +384,8 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
 static void PackARGB_C(const uint8_t* WEBP_RESTRICT a,
                       const uint8_t* WEBP_RESTRICT r,
                       const uint8_t* WEBP_RESTRICT g,
-                       const uint8_t* WEBP_RESTRICT b,
-                       int len, uint32_t* WEBP_RESTRICT out) {
+                       const uint8_t* WEBP_RESTRICT b, int len,
+                       uint32_t* WEBP_RESTRICT out) {
  int i;
  for (i = 0; i < len; ++i) {
    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
@@ -394,8 +395,8 @@ static void PackARGB_C(const uint8_t* WEBP_RESTRICT a,

 static void PackRGB_C(const uint8_t* WEBP_RESTRICT r,
                      const uint8_t* WEBP_RESTRICT g,
-                      const uint8_t* WEBP_RESTRICT b,
-                      int len, int step, uint32_t* WEBP_RESTRICT out) {
+                      const uint8_t* WEBP_RESTRICT b, int len, int step,
+                      uint32_t* WEBP_RESTRICT out) {
  int i, offset = 0;
  for (i = 0; i < len; ++i) {
    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
@@ -419,8 +420,8 @@ void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
 #endif
 void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
                    const uint8_t* WEBP_RESTRICT g,
-                    const uint8_t* WEBP_RESTRICT b,
-                    int len, int step, uint32_t* WEBP_RESTRICT out);
+                    const uint8_t* WEBP_RESTRICT b, int len, int step,
+                    uint32_t* WEBP_RESTRICT out);

 int (*WebPHasAlpha8b)(const uint8_t* src, int length);
 int (*WebPHasAlpha32b)(const uint8_t* src, int length);
--- a/src/dsp/alpha_processing_mips_dsp_r2.c
+++ b/src/dsp/alpha_processing_mips_dsp_r2.c
@@ -17,8 +17,8 @@
 #if defined(WEBP_USE_MIPS_DSP_R2)

 static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
-                                   int width, int height,
-                                   uint8_t* dst, int dst_stride) {
+                                   int width, int height, uint8_t* dst,
+                                   int dst_stride) {
  uint32_t alpha_mask = 0xffffffff;
  int i, j, temp0;

@@ -28,97 +28,92 @@ static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
    for (i = 0; i < (width >> 2); ++i) {
      int temp1, temp2, temp3;

-      __asm__ volatile (
-        "ulw    %[temp0],      0(%[palpha])                \n\t"
-        "addiu  %[palpha],     %[palpha],     4            \n\t"
-        "addiu  %[pdst],       %[pdst],       16           \n\t"
-        "srl    %[temp1],      %[temp0],      8            \n\t"
-        "srl    %[temp2],      %[temp0],      16           \n\t"
-        "srl    %[temp3],      %[temp0],      24           \n\t"
-        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
-        "sb     %[temp0],      -16(%[pdst])                \n\t"
-        "sb     %[temp1],      -12(%[pdst])                \n\t"
-        "sb     %[temp2],      -8(%[pdst])                 \n\t"
-        "sb     %[temp3],      -4(%[pdst])                 \n\t"
-        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-          [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
-          [alpha_mask]"+r"(alpha_mask)
-        :
-        : "memory"
-      );
+      __asm__ volatile(
+          "ulw    %[temp0],      0(%[palpha])                \n\t"
+          "addiu  %[palpha],     %[palpha],     4            \n\t"
+          "addiu  %[pdst],       %[pdst],       16           \n\t"
+          "srl    %[temp1],      %[temp0],      8            \n\t"
+          "srl    %[temp2],      %[temp0],      16           \n\t"
+          "srl    %[temp3],      %[temp0],      24           \n\t"
+          "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
+          "sb     %[temp0],      -16(%[pdst])                \n\t"
+          "sb     %[temp1],      -12(%[pdst])                \n\t"
+          "sb     %[temp2],      -8(%[pdst])                 \n\t"
+          "sb     %[temp3],      -4(%[pdst])                 \n\t"
+          : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+            [temp3] "=&r"(temp3), [palpha] "+r"(palpha), [pdst] "+r"(pdst),
+            [alpha_mask] "+r"(alpha_mask)
+          :
+          : "memory");
    }

    for (i = 0; i < (width & 3); ++i) {
-      __asm__ volatile (
-        "lbu    %[temp0],      0(%[palpha])                \n\t"
-        "addiu  %[palpha],     %[palpha],     1            \n\t"
-        "sb     %[temp0],      0(%[pdst])                  \n\t"
-        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
-        "addiu  %[pdst],       %[pdst],       4            \n\t"
-        : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
-          [alpha_mask]"+r"(alpha_mask)
-        :
-        : "memory"
-      );
+      __asm__ volatile(
+          "lbu    %[temp0],      0(%[palpha])                \n\t"
+          "addiu  %[palpha],     %[palpha],     1            \n\t"
+          "sb     %[temp0],      0(%[pdst])                  \n\t"
+          "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
+          "addiu  %[pdst],       %[pdst],       4            \n\t"
+          : [temp0] "=&r"(temp0), [palpha] "+r"(palpha), [pdst] "+r"(pdst),
+            [alpha_mask] "+r"(alpha_mask)
+          :
+          : "memory");
    }
    alpha += alpha_stride;
    dst += dst_stride;
  }

-  __asm__ volatile (
-    "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
-    "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
-    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
-    "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
-    "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
-    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
-    : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
-    :
-  );
+  __asm__ volatile(
+      "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
+      "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
+      "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
+      "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
+      "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
+      "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
+      : [temp0] "=&r"(temp0), [alpha_mask] "+r"(alpha_mask)
+      :);

  return (alpha_mask != 0xff);
 }

-static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
-                                  int inverse) {
+static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width, int inverse) {
  int x;
  const uint32_t c_00ffffff = 0x00ffffffu;
  const uint32_t c_ff000000 = 0xff000000u;
-  const uint32_t c_8000000  = 0x00800000u;
-  const uint32_t c_8000080  = 0x00800080u;
+  const uint32_t c_8000000 = 0x00800000u;
+  const uint32_t c_8000080 = 0x00800080u;
  for (x = 0; x < width; ++x) {
    const uint32_t argb = ptr[x];
-    if (argb < 0xff000000u) {      // alpha < 255
-      if (argb <= 0x00ffffffu) {   // alpha == 0
+    if (argb < 0xff000000u) {     // alpha < 255
+      if (argb <= 0x00ffffffu) {  // alpha == 0
        ptr[x] = 0;
      } else {
        int temp0, temp1, temp2, temp3, alpha;
-        __asm__ volatile (
-          "srl          %[alpha],   %[argb],       24                \n\t"
-          "replv.qb     %[temp0],   %[alpha]                         \n\t"
-          "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
-          "beqz         %[inverse], 0f                               \n\t"
-          "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
-          "mflo         %[temp0]                                     \n\t"
-        "0:                                                          \n\t"
-          "andi         %[temp1],   %[argb],       0xff              \n\t"
-          "ext          %[temp2],   %[argb],       8,             8  \n\t"
-          "ext          %[temp3],   %[argb],       16,            8  \n\t"
-          "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
-          "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
-          "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
-          "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
-          "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
-          "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
-          "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
-          "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
-          : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-            [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
-          : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
-            [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
-            [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
-          : "memory", "hi", "lo"
-        );
+        __asm__ volatile(
+            "srl          %[alpha],   %[argb],       24                \n\t"
+            "replv.qb     %[temp0],   %[alpha]                         \n\t"
+            "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
+            "beqz         %[inverse], 0f                               \n\t"
+            "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
+            "mflo         %[temp0]                                     \n\t"
+            "0:                                                          \n\t"
+            "andi         %[temp1],   %[argb],       0xff              \n\t"
+            "ext          %[temp2],   %[argb],       8,             8  \n\t"
+            "ext          %[temp3],   %[argb],       16,            8  \n\t"
+            "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
+            "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
+            "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
+            "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
+            "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
+            "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
+            "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
+            "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
+            : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+              [temp3] "=&r"(temp3), [alpha] "=&r"(alpha)
+            : [inverse] "r"(inverse), [c_00ffffff] "r"(c_00ffffff),
+              [c_8000000] "r"(c_8000000), [c_8000080] "r"(c_8000080),
+              [c_ff000000] "r"(c_ff000000), [argb] "r"(argb)
+            : "memory", "hi", "lo");
        ptr[x] = temp1;
      }
    }
@@ -133,38 +128,37 @@ static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
  const int rest = len & 1;
  const uint32_t* const loop_end = out + len - rest;
  const int step = 4;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
+  __asm__ volatile(
+      "xor          %[offset],   %[offset], %[offset]    \n\t"
+      "beq          %[loop_end], %[out],    0f           \n\t"
+      "2:                                                  \n\t"
+      "lbux         %[temp0],    %[offset](%[a])         \n\t"
+      "lbux         %[temp1],    %[offset](%[r])         \n\t"
+      "lbux         %[temp2],    %[offset](%[g])         \n\t"
+      "lbux         %[temp3],    %[offset](%[b])         \n\t"
+      "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+      "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+      "addiu        %[out],      %[out],    4            \n\t"
+      "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+      "sw           %[temp0],    -4(%[out])              \n\t"
+      "addu         %[offset],   %[offset], %[step]      \n\t"
+      "bne          %[loop_end], %[out],    2b           \n\t"
+      "0:                                                  \n\t"
+      "beq          %[rest],     $zero,     1f           \n\t"
+      "lbux         %[temp0],    %[offset](%[a])         \n\t"
+      "lbux         %[temp1],    %[offset](%[r])         \n\t"
+      "lbux         %[temp2],    %[offset](%[g])         \n\t"
+      "lbux         %[temp3],    %[offset](%[b])         \n\t"
+      "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+      "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+      "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+      "sw           %[temp0],    0(%[out])               \n\t"
+      "1:                                                  \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [offset] "=&r"(offset), [out] "+&r"(out)
+      : [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step),
+        [loop_end] "r"(loop_end), [rest] "r"(rest)
+      : "memory");
 }
 #endif  // WORDS_BIGENDIAN

@@ -175,36 +169,35 @@ static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
  const int rest = len & 1;
  const int a = 0xff;
  const uint32_t* const loop_end = out + len - rest;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
+  __asm__ volatile(
+      "xor          %[offset],   %[offset], %[offset]    \n\t"
+      "beq          %[loop_end], %[out],    0f           \n\t"
+      "2:                                                  \n\t"
+      "lbux         %[temp0],    %[offset](%[r])         \n\t"
+      "lbux         %[temp1],    %[offset](%[g])         \n\t"
+      "lbux         %[temp2],    %[offset](%[b])         \n\t"
+      "ins          %[temp0],    %[a],      16,     16   \n\t"
+      "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+      "addiu        %[out],      %[out],    4            \n\t"
+      "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+      "sw           %[temp0],    -4(%[out])              \n\t"
+      "addu         %[offset],   %[offset], %[step]      \n\t"
+      "bne          %[loop_end], %[out],    2b           \n\t"
+      "0:                                                  \n\t"
+      "beq          %[rest],     $zero,     1f           \n\t"
+      "lbux         %[temp0],    %[offset](%[r])         \n\t"
+      "lbux         %[temp1],    %[offset](%[g])         \n\t"
+      "lbux         %[temp2],    %[offset](%[b])         \n\t"
+      "ins          %[temp0],    %[a],      16,     16   \n\t"
+      "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+      "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+      "sw           %[temp0],    0(%[out])               \n\t"
+      "1:                                                  \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [offset] "=&r"(offset), [out] "+&r"(out)
+      : [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step),
+        [loop_end] "r"(loop_end), [rest] "r"(rest)
+      : "memory");
 }

 //------------------------------------------------------------------------------
--- a/src/dsp/alpha_processing_neon.c
+++ b/src/dsp/alpha_processing_neon.c
@@ -22,25 +22,26 @@
 #define MULTIPLIER(a) ((a) * 0x8081)
 #define PREMULTIPLY(x, m) (((x) * (m)) >> 23)

-#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER) do {                        \
-  const uint8x8_t alpha = (V).val[(ALPHA)];                            \
-  const uint16x8_t r1 = vmull_u8((V).val[1], alpha);                   \
-  const uint16x8_t g1 = vmull_u8((V).val[2], alpha);                   \
-  const uint16x8_t b1 = vmull_u8((V).val[(OTHER)], alpha);             \
-  /* we use: v / 255 = (v + 1 + (v >> 8)) >> 8 */                      \
-  const uint16x8_t r2 = vsraq_n_u16(r1, r1, 8);                        \
-  const uint16x8_t g2 = vsraq_n_u16(g1, g1, 8);                        \
-  const uint16x8_t b2 = vsraq_n_u16(b1, b1, 8);                        \
-  const uint16x8_t r3 = vaddq_u16(r2, kOne);                           \
-  const uint16x8_t g3 = vaddq_u16(g2, kOne);                           \
-  const uint16x8_t b3 = vaddq_u16(b2, kOne);                           \
-  (V).val[1] = vshrn_n_u16(r3, 8);                                     \
-  (V).val[2] = vshrn_n_u16(g3, 8);                                     \
-  (V).val[(OTHER)] = vshrn_n_u16(b3, 8);                               \
-} while (0)
+#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER)                   \
+  do {                                                       \
+    const uint8x8_t alpha = (V).val[(ALPHA)];                \
+    const uint16x8_t r1 = vmull_u8((V).val[1], alpha);       \
+    const uint16x8_t g1 = vmull_u8((V).val[2], alpha);       \
+    const uint16x8_t b1 = vmull_u8((V).val[(OTHER)], alpha); \
+    /* we use: v / 255 = (v + 1 + (v >> 8)) >> 8 */          \
+    const uint16x8_t r2 = vsraq_n_u16(r1, r1, 8);            \
+    const uint16x8_t g2 = vsraq_n_u16(g1, g1, 8);            \
+    const uint16x8_t b2 = vsraq_n_u16(b1, b1, 8);            \
+    const uint16x8_t r3 = vaddq_u16(r2, kOne);               \
+    const uint16x8_t g3 = vaddq_u16(g2, kOne);               \
+    const uint16x8_t b3 = vaddq_u16(b2, kOne);               \
+    (V).val[1] = vshrn_n_u16(r3, 8);                         \
+    (V).val[2] = vshrn_n_u16(g3, 8);                         \
+    (V).val[(OTHER)] = vshrn_n_u16(b3, 8);                   \
+  } while (0)

-static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
-                                    int w, int h, int stride) {
+static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first, int w,
+                                    int h, int stride) {
  const uint16x8_t kOne = vdupq_n_u16(1u);
  while (h-- > 0) {
    uint32_t* const rgbx = (uint32_t*)rgba;
@@ -118,7 +119,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
                                      uint32_t* WEBP_RESTRICT dst,
                                      int dst_stride) {
  int i, j;
-  uint8x8x4_t greens;   // leave A/R/B channels zero'd.
+  uint8x8x4_t greens;  // leave A/R/B channels zero'd.
  greens.val[0] = vdup_n_u8(0);
  greens.val[2] = vdup_n_u8(0);
  greens.val[3] = vdup_n_u8(0);
--- a/src/dsp/alpha_processing_sse2.c
+++ b/src/dsp/alpha_processing_sse2.c
@@ -16,8 +16,8 @@
 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>

-#include "src/webp/types.h"
 #include "src/dsp/cpu.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------

@@ -90,7 +90,7 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
  const __m128i zero = _mm_setzero_si128();
  const int limit = width & ~15;
  for (j = 0; j < height; ++j) {
-    for (i = 0; i < limit; i += 16) {   // process 16 alpha bytes
+    for (i = 0; i < limit; i += 16) {  // process 16 alpha bytes
      const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
      const __m128i a1 = _mm_unpacklo_epi8(zero, a0);  // note the 'zero' first!
      const __m128i b1 = _mm_unpackhi_epi8(zero, a0);
@@ -98,9 +98,9 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
      const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero);
      const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
      const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero);
-      _mm_storeu_si128((__m128i*)&dst[i +  0], a2_lo);
-      _mm_storeu_si128((__m128i*)&dst[i +  4], a2_hi);
-      _mm_storeu_si128((__m128i*)&dst[i +  8], b2_lo);
+      _mm_storeu_si128((__m128i*)&dst[i + 0], a2_lo);
+      _mm_storeu_si128((__m128i*)&dst[i + 4], a2_hi);
+      _mm_storeu_si128((__m128i*)&dst[i + 8], b2_lo);
      _mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi);
    }
    for (; i < width; ++i) dst[i] = alpha[i] << 8;
@@ -197,36 +197,37 @@ static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb,
 //------------------------------------------------------------------------------
 // Non-dither premultiplied modes

-#define MULTIPLIER(a)   ((a) * 0x8081)
+#define MULTIPLIER(a) ((a) * 0x8081)
 #define PREMULTIPLY(x, m) (((x) * (m)) >> 23)

 // We can't use a 'const int' for the SHUFFLE value, because it has to be an
 // immediate in the _mm_shufflexx_epi16() instruction. We really need a macro.
 // We use: v / 255 = (v * 0x8081) >> 23, where v = alpha * {r,g,b} is a 16bit
 // value.
-#define APPLY_ALPHA(RGBX, SHUFFLE) do {                              \
-  const __m128i argb0 = _mm_loadu_si128((const __m128i*)&(RGBX));    \
-  const __m128i argb1_lo = _mm_unpacklo_epi8(argb0, zero);           \
-  const __m128i argb1_hi = _mm_unpackhi_epi8(argb0, zero);           \
-  const __m128i alpha0_lo = _mm_or_si128(argb1_lo, kMask);           \
-  const __m128i alpha0_hi = _mm_or_si128(argb1_hi, kMask);           \
-  const __m128i alpha1_lo = _mm_shufflelo_epi16(alpha0_lo, SHUFFLE); \
-  const __m128i alpha1_hi = _mm_shufflelo_epi16(alpha0_hi, SHUFFLE); \
-  const __m128i alpha2_lo = _mm_shufflehi_epi16(alpha1_lo, SHUFFLE); \
-  const __m128i alpha2_hi = _mm_shufflehi_epi16(alpha1_hi, SHUFFLE); \
-  /* alpha2 = [ff a0 a0 a0][ff a1 a1 a1] */                          \
-  const __m128i A0_lo = _mm_mullo_epi16(alpha2_lo, argb1_lo);        \
-  const __m128i A0_hi = _mm_mullo_epi16(alpha2_hi, argb1_hi);        \
-  const __m128i A1_lo = _mm_mulhi_epu16(A0_lo, kMult);               \
-  const __m128i A1_hi = _mm_mulhi_epu16(A0_hi, kMult);               \
-  const __m128i A2_lo = _mm_srli_epi16(A1_lo, 7);                    \
-  const __m128i A2_hi = _mm_srli_epi16(A1_hi, 7);                    \
-  const __m128i A3 = _mm_packus_epi16(A2_lo, A2_hi);                 \
-  _mm_storeu_si128((__m128i*)&(RGBX), A3);                           \
-} while (0)
+#define APPLY_ALPHA(RGBX, SHUFFLE)                                     \
+  do {                                                                 \
+    const __m128i argb0 = _mm_loadu_si128((const __m128i*)&(RGBX));    \
+    const __m128i argb1_lo = _mm_unpacklo_epi8(argb0, zero);           \
+    const __m128i argb1_hi = _mm_unpackhi_epi8(argb0, zero);           \
+    const __m128i alpha0_lo = _mm_or_si128(argb1_lo, kMask);           \
+    const __m128i alpha0_hi = _mm_or_si128(argb1_hi, kMask);           \
+    const __m128i alpha1_lo = _mm_shufflelo_epi16(alpha0_lo, SHUFFLE); \
+    const __m128i alpha1_hi = _mm_shufflelo_epi16(alpha0_hi, SHUFFLE); \
+    const __m128i alpha2_lo = _mm_shufflehi_epi16(alpha1_lo, SHUFFLE); \
+    const __m128i alpha2_hi = _mm_shufflehi_epi16(alpha1_hi, SHUFFLE); \
+    /* alpha2 = [ff a0 a0 a0][ff a1 a1 a1] */                          \
+    const __m128i A0_lo = _mm_mullo_epi16(alpha2_lo, argb1_lo);        \
+    const __m128i A0_hi = _mm_mullo_epi16(alpha2_hi, argb1_hi);        \
+    const __m128i A1_lo = _mm_mulhi_epu16(A0_lo, kMult);               \
+    const __m128i A1_hi = _mm_mulhi_epu16(A0_hi, kMult);               \
+    const __m128i A2_lo = _mm_srli_epi16(A1_lo, 7);                    \
+    const __m128i A2_hi = _mm_srli_epi16(A1_hi, 7);                    \
+    const __m128i A3 = _mm_packus_epi16(A2_lo, A2_hi);                 \
+    _mm_storeu_si128((__m128i*)&(RGBX), A3);                           \
+  } while (0)

-static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
-                                    int w, int h, int stride) {
+static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first, int w,
+                                    int h, int stride) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i kMult = _mm_set1_epi16((short)0x8081);
  const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0);
@@ -273,7 +274,8 @@ static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
    const int mask = _mm_movemask_epi8(bits);
    if (mask != 0xffff) return 1;
  }
-  for (; i < length; ++i) if (src[i] != 0xff) return 1;
+  for (; i < length; ++i)
+    if (src[i] != 0xff) return 1;
  return 0;
 }

@@ -284,9 +286,9 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
  // We don't know if we can access the last 3 bytes after the last alpha
  // value 'src[4 * length - 4]' (because we don't know if alpha is the first
  // or the last byte of the quadruplet). Hence the '-3' protection below.
-  length = length * 4 - 3;   // size in bytes
+  length = length * 4 - 3;  // size in bytes
  for (; i + 64 <= length; i += 64) {
-    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i +  0));
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
    const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
    const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
    const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
@@ -296,23 +298,24 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
    const __m128i b3 = _mm_and_si128(a3, alpha_mask);
    const __m128i c0 = _mm_packs_epi32(b0, b1);
    const __m128i c1 = _mm_packs_epi32(b2, b3);
-    const __m128i d  = _mm_packus_epi16(c0, c1);
+    const __m128i d = _mm_packus_epi16(c0, c1);
    const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
    const int mask = _mm_movemask_epi8(bits);
    if (mask != 0xffff) return 1;
  }
  for (; i + 32 <= length; i += 32) {
-    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i +  0));
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
    const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
    const __m128i b0 = _mm_and_si128(a0, alpha_mask);
    const __m128i b1 = _mm_and_si128(a1, alpha_mask);
-    const __m128i c  = _mm_packs_epi32(b0, b1);
-    const __m128i d  = _mm_packus_epi16(c, c);
+    const __m128i c = _mm_packs_epi32(b0, b1);
+    const __m128i d = _mm_packus_epi16(c, c);
    const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
    const int mask = _mm_movemask_epi8(bits);
    if (mask != 0xffff) return 1;
  }
-  for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
+  for (; i <= length; i += 4)
+    if (src[i] != 0xff) return 1;
  return 0;
 }

@@ -334,7 +337,8 @@ static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) {
    _mm_storeu_si128((__m128i*)(src + i + 0), _mm_or_si128(d0, e0));
    _mm_storeu_si128((__m128i*)(src + i + 4), _mm_or_si128(d1, e1));
  }
-  for (; i < length; ++i) if ((src[i] >> 24) == 0) src[i] = color;
+  for (; i < length; ++i)
+    if ((src[i] >> 24) == 0) src[i] = color;
 }

 // -----------------------------------------------------------------------------
@@ -369,8 +373,8 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
 }

 static void MultRow_SSE2(uint8_t* WEBP_RESTRICT const ptr,
-                         const uint8_t* WEBP_RESTRICT const alpha,
-                         int width, int inverse) {
+                         const uint8_t* WEBP_RESTRICT const alpha, int width,
+                         int inverse) {
  int x = 0;
  if (!inverse) {
    const __m128i zero = _mm_setzero_si128();
--- a/src/dsp/alpha_processing_sse41.c
+++ b/src/dsp/alpha_processing_sse41.c
@@ -12,8 +12,8 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include "src/dsp/cpu.h"
-#include "src/webp/types.h"
 #include "src/dsp/dsp.h"
+#include "src/webp/types.h"

 #if defined(WEBP_USE_SSE41)
 #include <emmintrin.h>
@@ -35,14 +35,14 @@ static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb,
  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
  // last byte of the quadruplet.
  const int limit = (width - 1) & ~15;
-  const __m128i kCstAlpha0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
-                                          -1, -1, -1, -1, 12, 8, 4, 0);
-  const __m128i kCstAlpha1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
-                                          12, 8, 4, 0, -1, -1, -1, -1);
-  const __m128i kCstAlpha2 = _mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0,
-                                          -1, -1, -1, -1, -1, -1, -1, -1);
-  const __m128i kCstAlpha3 = _mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1,
-                                          -1, -1, -1, -1, -1, -1, -1, -1);
+  const __m128i kCstAlpha0 =
+      _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0);
+  const __m128i kCstAlpha1 =
+      _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0, -1, -1, -1, -1);
+  const __m128i kCstAlpha2 =
+      _mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+  const __m128i kCstAlpha3 =
+      _mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
  for (j = 0; j < height; ++j) {
    const __m128i* src = (const __m128i*)argb;
    for (i = 0; i < limit; i += 16) {
--- a/src/dsp/common_sse41.h
+++ b/src/dsp/common_sse41.h
@@ -44,12 +44,12 @@ static WEBP_INLINE void VP8PlanarTo24b_SSE41(

  // Process R.
  {
-    const __m128i shuff0 = _mm_set_epi8(
-        5, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);
-    const __m128i shuff1 = _mm_set_epi8(
-        -1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1);
-    const __m128i shuff2 = _mm_set_epi8(
-     -1, -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1);
+    const __m128i shuff0 =
+        _mm_set_epi8(5, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);
+    const __m128i shuff1 = _mm_set_epi8(-1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7,
+                                        -1, -1, 6, -1, -1);
+    const __m128i shuff2 = _mm_set_epi8(-1, -1, 15, -1, -1, 14, -1, -1, 13, -1,
+                                        -1, 12, -1, -1, 11, -1);
    WEBP_SSE41_SHUFF(R, in0, in1)
  }

@@ -57,23 +57,23 @@ static WEBP_INLINE void VP8PlanarTo24b_SSE41(
  {
    // Same as before, just shifted to the left by one and including the right
    // padding.
-    const __m128i shuff0 = _mm_set_epi8(
-        -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);
-    const __m128i shuff1 = _mm_set_epi8(
-        10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);
-    const __m128i shuff2 = _mm_set_epi8(
-     -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1);
+    const __m128i shuff0 =
+        _mm_set_epi8(-1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);
+    const __m128i shuff1 =
+        _mm_set_epi8(10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);
+    const __m128i shuff2 = _mm_set_epi8(-1, 15, -1, -1, 14, -1, -1, 13, -1, -1,
+                                        12, -1, -1, 11, -1, -1);
    WEBP_SSE41_SHUFF(G, in2, in3)
  }

  // Process B.
  {
-    const __m128i shuff0 = _mm_set_epi8(
-        -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);
-    const __m128i shuff1 = _mm_set_epi8(
-        -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);
-    const __m128i shuff2 = _mm_set_epi8(
-      15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1, 10);
+    const __m128i shuff0 =
+        _mm_set_epi8(-1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);
+    const __m128i shuff1 =
+        _mm_set_epi8(-1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);
+    const __m128i shuff2 = _mm_set_epi8(15, -1, -1, 14, -1, -1, 13, -1, -1, 12,
+                                        -1, -1, 11, -1, -1, 10);
    WEBP_SSE41_SHUFF(B, in4, in5)
  }

--- a/src/dsp/cost.c
+++ b/src/dsp/cost.c
@@ -14,42 +14,37 @@
 #include <stdlib.h>

 #include "src/dsp/cpu.h"
-#include "src/webp/types.h"
 #include "src/dsp/dsp.h"
 #include "src/enc/cost_enc.h"
 #include "src/enc/vp8i_enc.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Boolean-cost cost table

 const uint16_t VP8EntropyCost[256] = {
-  1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216,
-  1178, 1152, 1110, 1076, 1061, 1024, 1024,  992,  968,  951,
-   939,  911,  896,  878,  871,  854,  838,  820,  811,  794,
-   786,  768,  768,  752,  740,  732,  720,  709,  704,  690,
-   683,  672,  666,  655,  647,  640,  631,  622,  615,  607,
-   598,  592,  586,  576,  572,  564,  559,  555,  547,  541,
-   534,  528,  522,  512,  512,  504,  500,  494,  488,  483,
-   477,  473,  467,  461,  458,  452,  448,  443,  438,  434,
-   427,  424,  419,  415,  410,  406,  403,  399,  394,  390,
-   384,  384,  377,  374,  370,  366,  362,  359,  355,  351,
-   347,  342,  342,  336,  333,  330,  326,  323,  320,  316,
-   312,  308,  305,  302,  299,  296,  293,  288,  287,  283,
-   280,  277,  274,  272,  268,  266,  262,  256,  256,  256,
-   251,  248,  245,  242,  240,  237,  234,  232,  228,  226,
-   223,  221,  218,  216,  214,  211,  208,  205,  203,  201,
-   198,  196,  192,  191,  188,  187,  183,  181,  179,  176,
-   175,  171,  171,  168,  165,  163,  160,  159,  156,  154,
-   152,  150,  148,  146,  144,  142,  139,  138,  135,  133,
-   131,  128,  128,  125,  123,  121,  119,  117,  115,  113,
-   111,  110,  107,  105,  103,  102,  100,   98,   96,   94,
-    92,   91,   89,   86,   86,   83,   82,   80,   77,   76,
-    74,   73,   71,   69,   67,   66,   64,   63,   61,   59,
-    57,   55,   54,   52,   51,   49,   47,   46,   44,   43,
-    41,   40,   38,   36,   35,   33,   32,   30,   29,   27,
-    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,
-    10,    9,    7,    6,    4,    3
-};
+    1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216, 1178, 1152,
+    1110, 1076, 1061, 1024, 1024, 992,  968,  951,  939,  911,  896,  878,
+    871,  854,  838,  820,  811,  794,  786,  768,  768,  752,  740,  732,
+    720,  709,  704,  690,  683,  672,  666,  655,  647,  640,  631,  622,
+    615,  607,  598,  592,  586,  576,  572,  564,  559,  555,  547,  541,
+    534,  528,  522,  512,  512,  504,  500,  494,  488,  483,  477,  473,
+    467,  461,  458,  452,  448,  443,  438,  434,  427,  424,  419,  415,
+    410,  406,  403,  399,  394,  390,  384,  384,  377,  374,  370,  366,
+    362,  359,  355,  351,  347,  342,  342,  336,  333,  330,  326,  323,
+    320,  316,  312,  308,  305,  302,  299,  296,  293,  288,  287,  283,
+    280,  277,  274,  272,  268,  266,  262,  256,  256,  256,  251,  248,
+    245,  242,  240,  237,  234,  232,  228,  226,  223,  221,  218,  216,
+    214,  211,  208,  205,  203,  201,  198,  196,  192,  191,  188,  187,
+    183,  181,  179,  176,  175,  171,  171,  168,  165,  163,  160,  159,
+    156,  154,  152,  150,  148,  146,  144,  142,  139,  138,  135,  133,
+    131,  128,  128,  125,  123,  121,  119,  117,  115,  113,  111,  110,
+    107,  105,  103,  102,  100,  98,   96,   94,   92,   91,   89,   86,
+    86,   83,   82,   80,   77,   76,   74,   73,   71,   69,   67,   66,
+    64,   63,   61,   59,   57,   55,   54,   52,   51,   49,   47,   46,
+    44,   43,   41,   40,   38,   36,   35,   33,   32,   30,   29,   27,
+    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,   10,   9,
+    7,    6,    4,    3};

 //------------------------------------------------------------------------------
 // Level cost tables
@@ -57,270 +52,184 @@ const uint16_t VP8EntropyCost[256] = {
 // fixed costs for coding levels, deduce from the coding tree.
 // This is only the part that doesn't depend on the probability state.
 const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
-     0,  256,  256,  256,  256,  432,  618,  630,
-   731,  640,  640,  828,  901,  948, 1021, 1101,
-  1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
-  1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497,
-  1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358,
-  1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
-  1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679,
-  1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853,
-  1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
-  1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832,
-  1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910,
-  1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
-  1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059,
-  2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132,
-  2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
-  2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283,
-  2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200,
-  2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
-  2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
-  2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424,
-  2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
-  2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573,
-  2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651,
-  2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
-  2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572,
-  2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645,
-  2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
-  2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796,
-  2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872,
-  2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
-  2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023,
-  3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096,
-  3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
-  3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086,
-  3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164,
-  3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
-  3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313,
-  3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
-  3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
-  3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537,
-  3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848,
-  2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
-  2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999,
-  3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072,
-  3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
-  3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221,
-  3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299,
-  3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
-  3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289,
-  3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362,
-  3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
-  3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513,
-  3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589,
-  3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
-  3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740,
-  3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813,
-  3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
-  3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734,
-  3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812,
-  3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
-  3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961,
-  3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034,
-  4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
-  4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185,
-  4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102,
-  4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
-  4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253,
-  4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326,
-  4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
-  4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475,
-  4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553,
-  4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
-  4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547,
-  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
-  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
-  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
-  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
-  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
-  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
-  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
-  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
-  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
-  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
-  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
-  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
-  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
-  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
-  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
-  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
-  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
-  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
-  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
-  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
-  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
-  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
-  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
-  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
-  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
-  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
-  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
-  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
-  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
-  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
-  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
-  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
-  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
-  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
-  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
-  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
-  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
-  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
-  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
-  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
-  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
-  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
-  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
-  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
-  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
-  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
-  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
-  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
-  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
-  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
-  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
-  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
-  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
-  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
-  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
-  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
-  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
-  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
-  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
-  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
-  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
-  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
-  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
-  6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547,
-  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
-  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
-  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
-  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
-  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
-  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
-  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
-  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
-  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
-  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
-  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
-  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
-  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
-  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
-  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
-  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
-  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
-  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
-  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
-  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
-  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
-  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
-  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
-  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
-  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
-  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
-  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
-  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
-  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
-  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
-  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
-  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
-  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
-  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
-  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
-  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
-  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
-  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
-  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
-  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
-  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
-  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
-  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
-  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
-  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
-  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
-  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
-  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
-  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
-  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
-  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
-  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
-  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
-  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
-  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
-  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
-  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
-  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
-  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
-  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
-  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
-  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
-  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
-  6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
-  5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408,
-  5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486,
-  5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
-  5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635,
-  5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708,
-  5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
-  5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859,
-  5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
-  5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
-  5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927,
-  5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000,
-  6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
-  6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149,
-  6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227,
-  6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
-  6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148,
-  6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221,
-  6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
-  6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372,
-  6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448,
-  6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
-  6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599,
-  6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672,
-  6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
-  6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662,
-  6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740,
-  6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
-  6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889,
-  6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962,
-  6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
-  7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113,
-  7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424,
-  6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
-  6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575,
-  6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648,
-  6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
-  6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797,
-  6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875,
-  6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
-  6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865,
-  6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938,
-  6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
-  7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089,
-  7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165,
-  7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
-  7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
-  7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389,
-  7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
-  7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310,
-  7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388,
-  7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
-  7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537,
-  7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610,
-  7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
-  7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761
-};
+    0,    256,  256,  256,  256,  432,  618,  630,  731,  640,  640,  828,
+    901,  948,  1021, 1101, 1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
+    1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497, 1540, 1570, 1613, 1280,
+    1295, 1317, 1332, 1358, 1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
+    1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679, 1694, 1716, 1731, 1775,
+    1790, 1812, 1827, 1853, 1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
+    1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832, 1838, 1847, 1853, 1878,
+    1884, 1893, 1899, 1910, 1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
+    1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059, 2065, 2074, 2080, 2100,
+    2106, 2115, 2121, 2132, 2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
+    2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283, 2289, 2298, 2304, 2168,
+    2174, 2183, 2189, 2200, 2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
+    2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351, 2357, 2366, 2372, 2392,
+    2398, 2407, 2413, 2424, 2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
+    2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573, 2579, 2588, 2594, 2619,
+    2625, 2634, 2640, 2651, 2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
+    2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572, 2578, 2587, 2593, 2613,
+    2619, 2628, 2634, 2645, 2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
+    2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796, 2802, 2811, 2817, 2840,
+    2846, 2855, 2861, 2872, 2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
+    2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023, 3029, 3038, 3044, 3064,
+    3070, 3079, 3085, 3096, 3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
+    3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086, 3092, 3101, 3107, 3132,
+    3138, 3147, 3153, 3164, 3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
+    3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313, 3319, 3328, 3334, 3354,
+    3360, 3369, 3375, 3386, 3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
+    3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537, 3543, 3552, 3558, 2816,
+    2822, 2831, 2837, 2848, 2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
+    2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999, 3005, 3014, 3020, 3040,
+    3046, 3055, 3061, 3072, 3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
+    3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221, 3227, 3236, 3242, 3267,
+    3273, 3282, 3288, 3299, 3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
+    3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289, 3295, 3304, 3310, 3330,
+    3336, 3345, 3351, 3362, 3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
+    3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513, 3519, 3528, 3534, 3557,
+    3563, 3572, 3578, 3589, 3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
+    3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740, 3746, 3755, 3761, 3781,
+    3787, 3796, 3802, 3813, 3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
+    3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734, 3740, 3749, 3755, 3780,
+    3786, 3795, 3801, 3812, 3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
+    3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961, 3967, 3976, 3982, 4002,
+    4008, 4017, 4023, 4034, 4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
+    4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185, 4191, 4200, 4206, 4070,
+    4076, 4085, 4091, 4102, 4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
+    4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253, 4259, 4268, 4274, 4294,
+    4300, 4309, 4315, 4326, 4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
+    4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475, 4481, 4490, 4496, 4521,
+    4527, 4536, 4542, 4553, 4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
+    4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547, 3553, 3562, 3568, 3588,
+    3594, 3603, 3609, 3620, 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
+    3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771, 3777, 3786, 3792, 3815,
+    3821, 3830, 3836, 3847, 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
+    3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998, 4004, 4013, 4019, 4039,
+    4045, 4054, 4060, 4071, 4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
+    3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061, 4067, 4076, 4082, 4107,
+    4113, 4122, 4128, 4139, 4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
+    4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288, 4294, 4303, 4309, 4329,
+    4335, 4344, 4350, 4361, 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
+    4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512, 4518, 4527, 4533, 4328,
+    4334, 4343, 4349, 4360, 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
+    4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511, 4517, 4526, 4532, 4552,
+    4558, 4567, 4573, 4584, 4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
+    4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733, 4739, 4748, 4754, 4779,
+    4785, 4794, 4800, 4811, 4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
+    4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801, 4807, 4816, 4822, 4842,
+    4848, 4857, 4863, 4874, 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
+    4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025, 5031, 5040, 5046, 5069,
+    5075, 5084, 5090, 5101, 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
+    5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252, 5258, 5267, 5273, 5293,
+    5299, 5308, 5314, 5325, 5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
+    4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709, 4715, 4724, 4730, 4755,
+    4761, 4770, 4776, 4787, 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
+    4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936, 4942, 4951, 4957, 4977,
+    4983, 4992, 4998, 5009, 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
+    5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160, 5166, 5175, 5181, 5045,
+    5051, 5060, 5066, 5077, 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
+    5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228, 5234, 5243, 5249, 5269,
+    5275, 5284, 5290, 5301, 5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
+    5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450, 5456, 5465, 5471, 5496,
+    5502, 5511, 5517, 5528, 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
+    5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449, 5455, 5464, 5470, 5490,
+    5496, 5505, 5511, 5522, 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
+    5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673, 5679, 5688, 5694, 5717,
+    5723, 5732, 5738, 5749, 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
+    5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900, 5906, 5915, 5921, 5941,
+    5947, 5956, 5962, 5973, 5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
+    5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963, 5969, 5978, 5984, 6009,
+    6015, 6024, 6030, 6041, 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
+    6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190, 6196, 6205, 6211, 6231,
+    6237, 6246, 6252, 6263, 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
+    6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414, 6420, 6429, 6435, 3515,
+    3521, 3530, 3536, 3547, 3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
+    3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698, 3704, 3713, 3719, 3739,
+    3745, 3754, 3760, 3771, 3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
+    3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920, 3926, 3935, 3941, 3966,
+    3972, 3981, 3987, 3998, 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
+    4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988, 3994, 4003, 4009, 4029,
+    4035, 4044, 4050, 4061, 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
+    4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212, 4218, 4227, 4233, 4256,
+    4262, 4271, 4277, 4288, 4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
+    4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439, 4445, 4454, 4460, 4480,
+    4486, 4495, 4501, 4512, 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
+    4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433, 4439, 4448, 4454, 4479,
+    4485, 4494, 4500, 4511, 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
+    4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660, 4666, 4675, 4681, 4701,
+    4707, 4716, 4722, 4733, 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+    4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884, 4890, 4899, 4905, 4769,
+    4775, 4784, 4790, 4801, 4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
+    4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952, 4958, 4967, 4973, 4993,
+    4999, 5008, 5014, 5025, 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
+    5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174, 5180, 5189, 5195, 5220,
+    5226, 5235, 5241, 5252, 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
+    5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636, 4642, 4651, 4657, 4677,
+    4683, 4692, 4698, 4709, 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+    4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860, 4866, 4875, 4881, 4904,
+    4910, 4919, 4925, 4936, 4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
+    5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087, 5093, 5102, 5108, 5128,
+    5134, 5143, 5149, 5160, 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
+    5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150, 5156, 5165, 5171, 5196,
+    5202, 5211, 5217, 5228, 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
+    5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377, 5383, 5392, 5398, 5418,
+    5424, 5433, 5439, 5450, 5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
+    5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601, 5607, 5616, 5622, 5417,
+    5423, 5432, 5438, 5449, 5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
+    5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600, 5606, 5615, 5621, 5641,
+    5647, 5656, 5662, 5673, 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
+    5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822, 5828, 5837, 5843, 5868,
+    5874, 5883, 5889, 5900, 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
+    5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890, 5896, 5905, 5911, 5931,
+    5937, 5946, 5952, 5963, 5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
+    6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114, 6120, 6129, 6135, 6158,
+    6164, 6173, 6179, 6190, 6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
+    6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341, 6347, 6356, 6362, 6382,
+    6388, 6397, 6403, 6414, 6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
+    5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408, 5414, 5423, 5429, 5454,
+    5460, 5469, 5475, 5486, 5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
+    5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635, 5641, 5650, 5656, 5676,
+    5682, 5691, 5697, 5708, 5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
+    5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859, 5865, 5874, 5880, 5744,
+    5750, 5759, 5765, 5776, 5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
+    5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927, 5933, 5942, 5948, 5968,
+    5974, 5983, 5989, 6000, 6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
+    6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149, 6155, 6164, 6170, 6195,
+    6201, 6210, 6216, 6227, 6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
+    6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148, 6154, 6163, 6169, 6189,
+    6195, 6204, 6210, 6221, 6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
+    6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372, 6378, 6387, 6393, 6416,
+    6422, 6431, 6437, 6448, 6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
+    6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599, 6605, 6614, 6620, 6640,
+    6646, 6655, 6661, 6672, 6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
+    6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662, 6668, 6677, 6683, 6708,
+    6714, 6723, 6729, 6740, 6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
+    6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889, 6895, 6904, 6910, 6930,
+    6936, 6945, 6951, 6962, 6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
+    7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113, 7119, 7128, 7134, 6392,
+    6398, 6407, 6413, 6424, 6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
+    6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575, 6581, 6590, 6596, 6616,
+    6622, 6631, 6637, 6648, 6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
+    6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797, 6803, 6812, 6818, 6843,
+    6849, 6858, 6864, 6875, 6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
+    6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865, 6871, 6880, 6886, 6906,
+    6912, 6921, 6927, 6938, 6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
+    7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089, 7095, 7104, 7110, 7133,
+    7139, 7148, 7154, 7165, 7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
+    7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316, 7322, 7331, 7337, 7357,
+    7363, 7372, 7378, 7389, 7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
+    7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310, 7316, 7325, 7331, 7356,
+    7362, 7371, 7377, 7388, 7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
+    7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537, 7543, 7552, 7558, 7578,
+    7584, 7593, 7599, 7610, 7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
+    7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761};

 //------------------------------------------------------------------------------
 // Tables for level coding

 const uint8_t VP8EncBands[16 + 1] = {
-  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
-  0  // sentinel
+    0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+    0  // sentinel
 };

 //------------------------------------------------------------------------------
--- a/src/dsp/cost_mips32.c
+++ b/src/dsp/cost_mips32.c
@@ -38,48 +38,48 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
    return VP8BitCost(0, p0);
  }

-  __asm__ volatile (
-    ".set      push                                                        \n\t"
-    ".set      noreorder                                                   \n\t"
-    "subu      %[temp1],        %[res_last],        %[n]                   \n\t"
-    "sll       %[temp0],        %[n],               1                      \n\t"
-    "blez      %[temp1],        2f                                         \n\t"
-    " addu     %[res_coeffs],   %[res_coeffs],      %[temp0]               \n\t"
-  "1:                                                                      \n\t"
-    "lh        %[v_reg],        0(%[res_coeffs])                           \n\t"
-    "addiu     %[n],            %[n],               1                      \n\t"
-    "negu      %[temp0],        %[v_reg]                                   \n\t"
-    "slti      %[temp1],        %[v_reg],           0                      \n\t"
-    "movn      %[v_reg],        %[temp0],           %[temp1]               \n\t"
-    "sltiu     %[temp0],        %[v_reg],           2                      \n\t"
-    "move      %[ctx_reg],      %[v_reg]                                   \n\t"
-    "movz      %[ctx_reg],      %[const_2],         %[temp0]               \n\t"
-    "sll       %[temp1],        %[v_reg],           1                      \n\t"
-    "addu      %[temp1],        %[temp1],           %[VP8LevelFixedCosts]  \n\t"
-    "lhu       %[temp1],        0(%[temp1])                                \n\t"
-    "slt       %[temp0],        %[v_reg],           %[const_max_level]     \n\t"
-    "movz      %[v_reg],        %[const_max_level], %[temp0]               \n\t"
-    "addu      %[cost],         %[cost],            %[temp1]               \n\t"
-    "sll       %[v_reg],        %[v_reg],           1                      \n\t"
-    "sll       %[ctx_reg],      %[ctx_reg],         2                      \n\t"
-    "addu      %[v_reg],        %[v_reg],           %[t]                   \n\t"
-    "lhu       %[temp0],        0(%[v_reg])                                \n\t"
-    "addu      %[p_costs],      %[p_costs],         %[inc_p_costs]         \n\t"
-    "addu      %[t],            %[p_costs],         %[ctx_reg]             \n\t"
-    "addu      %[cost],         %[cost],            %[temp0]               \n\t"
-    "addiu     %[res_coeffs],   %[res_coeffs],      2                      \n\t"
-    "bne       %[n],            %[res_last],        1b                     \n\t"
-    " lw       %[t],            0(%[t])                                    \n\t"
-  "2:                                                                      \n\t"
-    ".set      pop                                                         \n\t"
-    : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
-      [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
-      [temp1]"=&r"(temp1), [res_coeffs]"+&r"(res_coeffs)
-    : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
-      [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
-      [inc_p_costs]"r"(inc_p_costs)
-    : "memory"
-  );
+  __asm__ volatile(
+      ".set    push                                                     \n\t"
+      ".set    noreorder                                                \n\t"
+      "subu    %[temp1],      %[res_last],        %[n]                  \n\t"
+      "sll     %[temp0],      %[n],               1                     \n\t"
+      "blez    %[temp1],      2f                                        \n\t"
+      " addu   %[res_coeffs], %[res_coeffs],      %[temp0]              \n\t"
+      "1:                                                               \n\t"
+      "lh      %[v_reg],      0(%[res_coeffs])                          \n\t"
+      "addiu   %[n],          %[n],               1                     \n\t"
+      "negu    %[temp0],      %[v_reg]                                  \n\t"
+      "slti    %[temp1],      %[v_reg],           0                     \n\t"
+      "movn    %[v_reg],      %[temp0],           %[temp1]              \n\t"
+      "sltiu   %[temp0],      %[v_reg],           2                     \n\t"
+      "move    %[ctx_reg],    %[v_reg]                                  \n\t"
+      "movz    %[ctx_reg],    %[const_2],         %[temp0]              \n\t"
+      "sll     %[temp1],      %[v_reg],           1                     \n\t"
+      "addu    %[temp1],      %[temp1],           %[VP8LevelFixedCosts] \n\t"
+      "lhu     %[temp1],      0(%[temp1])                               \n\t"
+      "slt     %[temp0],      %[v_reg],           %[const_max_level]    \n\t"
+      "movz    %[v_reg],      %[const_max_level], %[temp0]              \n\t"
+      "addu    %[cost],       %[cost],            %[temp1]              \n\t"
+      "sll     %[v_reg],      %[v_reg],           1                     \n\t"
+      "sll     %[ctx_reg],    %[ctx_reg],         2                     \n\t"
+      "addu    %[v_reg],      %[v_reg],           %[t]                  \n\t"
+      "lhu     %[temp0],      0(%[v_reg])                               \n\t"
+      "addu    %[p_costs],    %[p_costs],         %[inc_p_costs]        \n\t"
+      "addu    %[t],          %[p_costs],         %[ctx_reg]            \n\t"
+      "addu    %[cost],       %[cost],            %[temp0]              \n\t"
+      "addiu   %[res_coeffs], %[res_coeffs],      2                     \n\t"
+      "bne     %[n],          %[res_last],        1b                    \n\t"
+      " lw     %[t],          0(%[t])                                   \n\t"
+      "2:                                                               \n\t"
+      ".set    pop                                                      \n\t"
+      : [cost] "+&r"(cost), [t] "+&r"(t), [n] "+&r"(n), [v_reg] "=&r"(v_reg),
+        [ctx_reg] "=&r"(ctx_reg), [p_costs] "+&r"(p_costs),
+        [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
+        [res_coeffs] "+&r"(res_coeffs)
+      : [const_2] "r"(const_2), [const_max_level] "r"(const_max_level),
+        [VP8LevelFixedCosts] "r"(VP8LevelFixedCosts), [res_last] "r"(res_last),
+        [inc_p_costs] "r"(inc_p_costs)
+      : "memory");

  // Last coefficient is always non-zero
  {
@@ -102,37 +102,35 @@ static void SetResidualCoeffs_MIPS32(const int16_t* WEBP_RESTRICT const coeffs,
  int temp0, temp1, temp2, n, n1;
  assert(res->first == 0 || coeffs[0] == 0);

-  __asm__ volatile (
-    ".set     push                                      \n\t"
-    ".set     noreorder                                 \n\t"
-    "addiu    %[p_coeffs],   %[p_coeffs],    28         \n\t"
-    "li       %[n],          15                         \n\t"
-    "li       %[temp2],      -1                         \n\t"
-  "0:                                                   \n\t"
-    "ulw      %[temp0],      0(%[p_coeffs])             \n\t"
-    "beqz     %[temp0],      1f                         \n\t"
+  __asm__ volatile(
+      ".set     push                                      \n\t"
+      ".set     noreorder                                 \n\t"
+      "addiu    %[p_coeffs],   %[p_coeffs],    28         \n\t"
+      "li       %[n],          15                         \n\t"
+      "li       %[temp2],      -1                         \n\t"
+      "0:                                                   \n\t"
+      "ulw      %[temp0],      0(%[p_coeffs])             \n\t"
+      "beqz     %[temp0],      1f                         \n\t"
 #if defined(WORDS_BIGENDIAN)
-    " sll     %[temp1],      %[temp0],       16         \n\t"
+      " sll     %[temp1],      %[temp0],       16         \n\t"
 #else
-    " srl     %[temp1],      %[temp0],       16         \n\t"
+      " srl     %[temp1],      %[temp0],       16         \n\t"
 #endif
-    "addiu    %[n1],         %[n],           -1         \n\t"
-    "movz     %[temp0],      %[n1],          %[temp1]   \n\t"
-    "movn     %[temp0],      %[n],           %[temp1]   \n\t"
-    "j        2f                                        \n\t"
-    " addiu   %[temp2],      %[temp0],       0          \n\t"
-  "1:                                                   \n\t"
-    "addiu    %[n],          %[n],           -2         \n\t"
-    "bgtz     %[n],          0b                         \n\t"
-    " addiu   %[p_coeffs],   %[p_coeffs],    -4         \n\t"
-  "2:                                                   \n\t"
-    ".set     pop                                       \n\t"
-    : [p_coeffs]"+&r"(p_coeffs), [temp0]"=&r"(temp0),
-      [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [n]"=&r"(n), [n1]"=&r"(n1)
-    :
-    : "memory"
-  );
+      "addiu    %[n1],         %[n],           -1         \n\t"
+      "movz     %[temp0],      %[n1],          %[temp1]   \n\t"
+      "movn     %[temp0],      %[n],           %[temp1]   \n\t"
+      "j        2f                                        \n\t"
+      " addiu   %[temp2],      %[temp0],       0          \n\t"
+      "1:                                                   \n\t"
+      "addiu    %[n],          %[n],           -2         \n\t"
+      "bgtz     %[n],          0b                         \n\t"
+      " addiu   %[p_coeffs],   %[p_coeffs],    -4         \n\t"
+      "2:                                                   \n\t"
+      ".set     pop                                       \n\t"
+      : [p_coeffs] "+&r"(p_coeffs), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
+        [temp2] "=&r"(temp2), [n] "=&r"(n), [n1] "=&r"(n1)
+      :
+      : "memory");
  res->last = temp2;
  res->coeffs = coeffs;
 }
--- a/src/dsp/cost_mips_dsp_r2.c
+++ b/src/dsp/cost_mips_dsp_r2.c
@@ -38,43 +38,44 @@ static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
    return VP8BitCost(0, p0);
  }

-  __asm__ volatile (
-    ".set      push                                                     \n\t"
-    ".set      noreorder                                                \n\t"
-    "subu      %[temp1],        %[res_last],        %[n]                \n\t"
-    "blez      %[temp1],        2f                                      \n\t"
-    " nop                                                               \n\t"
-  "1:                                                                   \n\t"
-    "sll       %[temp0],        %[n],               1                   \n\t"
-    "lhx       %[v_reg],        %[temp0](%[res_coeffs])                 \n\t"
-    "addiu     %[n],            %[n],               1                   \n\t"
-    "absq_s.w  %[v_reg],        %[v_reg]                                \n\t"
-    "sltiu     %[temp0],        %[v_reg],           2                   \n\t"
-    "move      %[ctx_reg],      %[v_reg]                                \n\t"
-    "movz      %[ctx_reg],      %[const_2],         %[temp0]            \n\t"
-    "sll       %[temp1],        %[v_reg],           1                   \n\t"
-    "lhx       %[temp1],        %[temp1](%[VP8LevelFixedCosts])         \n\t"
-    "slt       %[temp0],        %[v_reg],           %[const_max_level]  \n\t"
-    "movz      %[v_reg],        %[const_max_level], %[temp0]            \n\t"
-    "addu      %[cost],         %[cost],            %[temp1]            \n\t"
-    "sll       %[v_reg],        %[v_reg],           1                   \n\t"
-    "sll       %[ctx_reg],      %[ctx_reg],         2                   \n\t"
-    "lhx       %[temp0],        %[v_reg](%[t])                          \n\t"
-    "addu      %[p_costs],      %[p_costs],         %[inc_p_costs]      \n\t"
-    "addu      %[t],            %[p_costs],         %[ctx_reg]          \n\t"
-    "addu      %[cost],         %[cost],            %[temp0]            \n\t"
-    "bne       %[n],            %[res_last],        1b                  \n\t"
-    " lw       %[t],            0(%[t])                                 \n\t"
-  "2:                                                                   \n\t"
-    ".set      pop                                                      \n\t"
-    : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
-      [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
-      [temp1]"=&r"(temp1)
-    : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
-      [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
-      [res_coeffs]"r"(res_coeffs), [inc_p_costs]"r"(inc_p_costs)
-    : "memory"
-  );
+  __asm__ volatile(
+      ".set      push                                                     \n\t"
+      ".set      noreorder                                                \n\t"
+      "subu      %[temp1],        %[res_last],        %[n]                \n\t"
+      "blez      %[temp1],        2f                                      \n\t"
+      " nop                                                               \n\t"
+      "1:                                                                   "
+      "\n\t"
+      "sll       %[temp0],        %[n],               1                   \n\t"
+      "lhx       %[v_reg],        %[temp0](%[res_coeffs])                 \n\t"
+      "addiu     %[n],            %[n],               1                   \n\t"
+      "absq_s.w  %[v_reg],        %[v_reg]                                \n\t"
+      "sltiu     %[temp0],        %[v_reg],           2                   \n\t"
+      "move      %[ctx_reg],      %[v_reg]                                \n\t"
+      "movz      %[ctx_reg],      %[const_2],         %[temp0]            \n\t"
+      "sll       %[temp1],        %[v_reg],           1                   \n\t"
+      "lhx       %[temp1],        %[temp1](%[VP8LevelFixedCosts])         \n\t"
+      "slt       %[temp0],        %[v_reg],           %[const_max_level]  \n\t"
+      "movz      %[v_reg],        %[const_max_level], %[temp0]            \n\t"
+      "addu      %[cost],         %[cost],            %[temp1]            \n\t"
+      "sll       %[v_reg],        %[v_reg],           1                   \n\t"
+      "sll       %[ctx_reg],      %[ctx_reg],         2                   \n\t"
+      "lhx       %[temp0],        %[v_reg](%[t])                          \n\t"
+      "addu      %[p_costs],      %[p_costs],         %[inc_p_costs]      \n\t"
+      "addu      %[t],            %[p_costs],         %[ctx_reg]          \n\t"
+      "addu      %[cost],         %[cost],            %[temp0]            \n\t"
+      "bne       %[n],            %[res_last],        1b                  \n\t"
+      " lw       %[t],            0(%[t])                                 \n\t"
+      "2:                                                                   "
+      "\n\t"
+      ".set      pop                                                      \n\t"
+      : [cost] "+&r"(cost), [t] "+&r"(t), [n] "+&r"(n), [v_reg] "=&r"(v_reg),
+        [ctx_reg] "=&r"(ctx_reg), [p_costs] "+&r"(p_costs),
+        [temp0] "=&r"(temp0), [temp1] "=&r"(temp1)
+      : [const_2] "r"(const_2), [const_max_level] "r"(const_max_level),
+        [VP8LevelFixedCosts] "r"(VP8LevelFixedCosts), [res_last] "r"(res_last),
+        [res_coeffs] "r"(res_coeffs), [inc_p_costs] "r"(inc_p_costs)
+      : "memory");

  // Last coefficient is always non-zero
  {
--- a/src/dsp/cost_neon.c
+++ b/src/dsp/cost_neon.c
@@ -16,8 +16,8 @@
 #include "src/dsp/neon.h"
 #include "src/enc/cost_enc.h"

-static const uint8_t position[16] = { 1, 2,  3,  4,  5,  6,  7,  8,
-                                      9, 10, 11, 12, 13, 14, 15, 16 };
+static const uint8_t position[16] = {1, 2,  3,  4,  5,  6,  7,  8,
+                                     9, 10, 11, 12, 13, 14, 15, 16};

 static void SetResidualCoeffs_NEON(const int16_t* WEBP_RESTRICT const coeffs,
                                   VP8Residual* WEBP_RESTRICT const res) {
@@ -65,7 +65,7 @@ static int GetResidualCost_NEON(int ctx0, const VP8Residual* const res) {
    return VP8BitCost(0, p0);
  }

-  {   // precompute clamped levels and contexts, packed to 8b.
+  {  // precompute clamped levels and contexts, packed to 8b.
    const uint8x16_t kCst2 = vdupq_n_u8(2);
    const uint8x16_t kCst67 = vdupq_n_u8(MAX_VARIABLE_LEVEL);
    const int16x8_t c0 = vld1q_s16(res->coeffs);
@@ -85,7 +85,7 @@ static int GetResidualCost_NEON(int ctx0, const VP8Residual* const res) {
  for (; n < res->last; ++n) {
    const int ctx = ctxs[n];
    const int level = levels[n];
-    const int flevel = abs_levels[n];   // full level
+    const int flevel = abs_levels[n];               // full level
    cost += VP8LevelFixedCosts[flevel] + t[level];  // simplified VP8LevelCost()
    t = costs[n + 1][ctx];
  }
--- a/src/dsp/cost_sse2.c
+++ b/src/dsp/cost_sse2.c
@@ -14,15 +14,14 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE2)
+#include <assert.h>
 #include <emmintrin.h>

-#include <assert.h>
-
-#include "src/webp/types.h"
 #include "src/dsp/cpu.h"
 #include "src/enc/cost_enc.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/utils/utils.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------

@@ -63,7 +62,7 @@ static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
    return VP8BitCost(0, p0);
  }

-  {   // precompute clamped levels and contexts, packed to 8b.
+  {  // precompute clamped levels and contexts, packed to 8b.
    const __m128i zero = _mm_setzero_si128();
    const __m128i kCst2 = _mm_set1_epi8(2);
    const __m128i kCst67 = _mm_set1_epi8(MAX_VARIABLE_LEVEL);
@@ -71,11 +70,11 @@ static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
    const __m128i c1 = _mm_loadu_si128((const __m128i*)&res->coeffs[8]);
    const __m128i D0 = _mm_sub_epi16(zero, c0);
    const __m128i D1 = _mm_sub_epi16(zero, c1);
-    const __m128i E0 = _mm_max_epi16(c0, D0);   // abs(v), 16b
+    const __m128i E0 = _mm_max_epi16(c0, D0);  // abs(v), 16b
    const __m128i E1 = _mm_max_epi16(c1, D1);
    const __m128i F = _mm_packs_epi16(E0, E1);
-    const __m128i G = _mm_min_epu8(F, kCst2);    // context = 0,1,2
-    const __m128i H = _mm_min_epu8(F, kCst67);   // clamp_level in [0..67]
+    const __m128i G = _mm_min_epu8(F, kCst2);   // context = 0,1,2
+    const __m128i H = _mm_min_epu8(F, kCst67);  // clamp_level in [0..67]

    _mm_storeu_si128((__m128i*)&ctxs[0], G);
    _mm_storeu_si128((__m128i*)&levels[0], H);
@@ -86,7 +85,7 @@ static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
  for (; n < res->last; ++n) {
    const int ctx = ctxs[n];
    const int level = levels[n];
-    const int flevel = abs_levels[n];   // full level
+    const int flevel = abs_levels[n];               // full level
    cost += VP8LevelFixedCosts[flevel] + t[level];  // simplified VP8LevelCost()
    t = costs[n + 1][ctx];
  }
--- a/src/dsp/cpu.c
+++ b/src/dsp/cpu.c
@@ -33,19 +33,20 @@
 // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
 #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
-  __asm__ volatile (
-    "mov %%ebx, %%edi\n"
-    "cpuid\n"
-    "xchg %%edi, %%ebx\n"
-    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type), "c"(0));
+  __asm__ volatile(
+      "mov %%ebx, %%edi\n"
+      "cpuid\n"
+      "xchg %%edi, %%ebx\n"
+      : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]),
+        "=d"(cpu_info[3])
+      : "a"(info_type), "c"(0));
 }
 #elif defined(__i386__) || defined(__x86_64__)
 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
-  __asm__ volatile (
-    "cpuid\n"
-    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type), "c"(0));
+  __asm__ volatile("cpuid\n"
+                   : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
+                     "=d"(cpu_info[3])
+                   : "a"(info_type), "c"(0));
 }
 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))

@@ -67,13 +68,13 @@ static WEBP_INLINE uint64_t xgetbv(void) {
  const uint32_t ecx = 0;
  uint32_t eax, edx;
  // Use the raw opcode for xgetbv for compatibility with older toolchains.
-  __asm__ volatile (
-    ".byte 0x0f, 0x01, 0xd0\n"
-    : "=a"(eax), "=d"(edx) : "c" (ecx));
+  __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
+                   : "=a"(eax), "=d"(edx)
+                   : "c"(ecx));
  return ((uint64_t)edx << 32) | eax;
 }
-#elif (defined(_M_X64) || defined(_M_IX86)) && \
-      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
+#elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \
+    _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
 #include <immintrin.h>
 #define xgetbv() _xgetbv(0)
 #elif defined(_MSC_VER) && defined(_M_IX86)
@@ -100,8 +101,8 @@ static int CheckSlowModel(int info) {
  // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
  // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
  static const uint8_t kSlowModels[] = {
-    0x37, 0x4a, 0x4d,  // Silvermont Microarchitecture
-    0x1c, 0x26, 0x27   // Atom Microarchitecture
+      0x37, 0x4a, 0x4d,  // Silvermont Microarchitecture
+      0x1c, 0x26, 0x27   // Atom Microarchitecture
  };
  const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
  const uint32_t family = (info >> 8) & 0xf;
@@ -130,7 +131,7 @@ static int x86CPUInfo(CPUFeature feature) {
    const int VENDOR_ID_INTEL_ECX = 0x6c65746e;  // letn
    is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
                cpu_info[2] == VENDOR_ID_INTEL_ECX &&
-                cpu_info[3] == VENDOR_ID_INTEL_EDX);    // genuine Intel?
+                cpu_info[3] == VENDOR_ID_INTEL_EDX);  // genuine Intel?
  }

  GetCPUInfo(cpu_info, 1);
@@ -141,7 +142,7 @@ static int x86CPUInfo(CPUFeature feature) {
    return !!(cpu_info[2] & (1 << 0));
  }
  if (feature == kSlowSSSE3) {
-    if (is_intel && (cpu_info[2] & (1 << 9))) {   // SSSE3?
+    if (is_intel && (cpu_info[2] & (1 << 9))) {  // SSSE3?
      return CheckSlowModel(cpu_info[0]);
    }
    return 0;
@@ -179,7 +180,7 @@ static int AndroidCPUInfo(CPUFeature feature) {
 }
 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
 VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
-#elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
+#elif defined(EMSCRIPTEN)         // also needs to be before generic NEON test
 // Use compile flags as an indicator of SIMD support instead of a runtime check.
 static int wasmCPUInfo(CPUFeature feature) {
  switch (feature) {
@@ -234,14 +235,13 @@ static int armCPUInfo(CPUFeature feature) {
 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
 VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
 #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
-      defined(WEBP_USE_MSA)
+    defined(WEBP_USE_MSA)
 static int mipsCPUInfo(CPUFeature feature) {
  if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
    return 1;
  } else {
    return 0;
  }
-
 }
 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
 VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
--- a/src/dsp/cpu.h
+++ b/src/dsp/cpu.h
@@ -119,7 +119,7 @@
 // inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
 // arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
 // vtbl4_u8(); a fix was made in 16.6.
-#if defined(_MSC_VER) && \
+#if defined(_MSC_VER) &&                      \
    ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
     (_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC))))
 #define WEBP_USE_NEON
@@ -192,6 +192,7 @@
 #if defined(WEBP_USE_THREAD) && !defined(_WIN32)
 #include <pthread.h>  // NOLINT

+// clang-format off
 #define WEBP_DSP_INIT(func)                                         \
  do {                                                              \
    static volatile VP8CPUInfo func##_last_cpuinfo_used =           \
@@ -202,7 +203,9 @@
    func##_last_cpuinfo_used = VP8GetCPUInfo;                       \
    (void)pthread_mutex_unlock(&func##_lock);                       \
  } while (0)
-#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
+// clang-format on
+#else   // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
+// clang-format off
 #define WEBP_DSP_INIT(func)                               \
  do {                                                    \
    static volatile VP8CPUInfo func##_last_cpuinfo_used = \
@@ -211,6 +214,7 @@
    func();                                               \
    func##_last_cpuinfo_used = VP8GetCPUInfo;             \
  } while (0)
+// clang-format on
 #endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)

 // Defines an Init + helper function that control multiple initialization of
--- a/src/dsp/dec.c
+++ b/src/dsp/dec.c
@@ -34,13 +34,14 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 #define STORE(x, y, v) \
  dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))

-#define STORE2(y, dc, d, c) do {    \
-  const int DC = (dc);              \
-  STORE(0, y, DC + (d));            \
-  STORE(1, y, DC + (c));            \
-  STORE(2, y, DC - (c));            \
-  STORE(3, y, DC - (d));            \
-} while (0)
+#define STORE2(y, dc, d, c) \
+  do {                      \
+    const int DC = (dc);    \
+    STORE(0, y, DC + (d));  \
+    STORE(1, y, DC + (c));  \
+    STORE(2, y, DC - (c));  \
+    STORE(3, y, DC - (d));  \
+  } while (0)

 #if !WEBP_NEON_OMIT_C_CODE
 static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
@@ -48,17 +49,17 @@ static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
  int C[4 * 4], *tmp;
  int i;
  tmp = C;
-  for (i = 0; i < 4; ++i) {    // vertical pass
-    const int a = in[0] + in[8];    // [-4096, 4094]
-    const int b = in[0] - in[8];    // [-4095, 4095]
+  for (i = 0; i < 4; ++i) {       // vertical pass
+    const int a = in[0] + in[8];  // [-4096, 4094]
+    const int b = in[0] - in[8];  // [-4095, 4095]
    const int c = WEBP_TRANSFORM_AC3_MUL2(in[4]) -
                  WEBP_TRANSFORM_AC3_MUL1(in[12]);  // [-3783, 3783]
    const int d = WEBP_TRANSFORM_AC3_MUL1(in[4]) +
                  WEBP_TRANSFORM_AC3_MUL2(in[12]);  // [-3785, 3781]
-    tmp[0] = a + d;   // [-7881, 7875]
-    tmp[1] = b + c;   // [-7878, 7878]
-    tmp[2] = b - c;   // [-7878, 7878]
-    tmp[3] = a - d;   // [-7877, 7879]
+    tmp[0] = a + d;                                 // [-7881, 7875]
+    tmp[1] = b + c;                                 // [-7878, 7878]
+    tmp[2] = b - c;                                 // [-7878, 7878]
+    tmp[3] = a - d;                                 // [-7877, 7879]
    tmp += 4;
    in++;
  }
@@ -70,10 +71,10 @@ static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
  // In the worst case scenario, the input to clip_8b() can be as large as
  // [-60713, 60968].
  tmp = C;
-  for (i = 0; i < 4; ++i) {    // horizontal pass
+  for (i = 0; i < 4; ++i) {  // horizontal pass
    const int dc = tmp[0] + 4;
-    const int a =  dc +  tmp[8];
-    const int b =  dc -  tmp[8];
+    const int a = dc + tmp[8];
+    const int b = dc - tmp[8];
    const int c =
        WEBP_TRANSFORM_AC3_MUL2(tmp[4]) - WEBP_TRANSFORM_AC3_MUL1(tmp[12]);
    const int d =
@@ -150,21 +151,21 @@ static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
  int i;
  for (i = 0; i < 4; ++i) {
    const int a0 = in[0 + i] + in[12 + i];
-    const int a1 = in[4 + i] + in[ 8 + i];
-    const int a2 = in[4 + i] - in[ 8 + i];
+    const int a1 = in[4 + i] + in[8 + i];
+    const int a2 = in[4 + i] - in[8 + i];
    const int a3 = in[0 + i] - in[12 + i];
-    tmp[0  + i] = a0 + a1;
-    tmp[8  + i] = a0 - a1;
-    tmp[4  + i] = a3 + a2;
+    tmp[0 + i] = a0 + a1;
+    tmp[8 + i] = a0 - a1;
+    tmp[4 + i] = a3 + a2;
    tmp[12 + i] = a3 - a2;
  }
  for (i = 0; i < 4; ++i) {
-    const int dc = tmp[0 + i * 4] + 3;    // w/ rounder
-    const int a0 = dc             + tmp[3 + i * 4];
+    const int dc = tmp[0 + i * 4] + 3;  // w/ rounder
+    const int a0 = dc + tmp[3 + i * 4];
    const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
    const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
-    const int a3 = dc             - tmp[3 + i * 4];
-    out[ 0] = (a0 + a1) >> 3;
+    const int a3 = dc - tmp[3 + i * 4];
+    out[0] = (a0 + a1) >> 3;
    out[16] = (a3 + a2) >> 3;
    out[32] = (a0 - a1) >> 3;
    out[48] = (a3 - a2) >> 3;
@@ -194,21 +195,21 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
    dst += BPS;
  }
 }
-static void TM4_C(uint8_t* dst)   { TrueMotion(dst, 4); }
+static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); }
 static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
-static void TM16_C(uint8_t* dst)  { TrueMotion(dst, 16); }
+static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); }

 //------------------------------------------------------------------------------
 // 16x16

-static void VE16_C(uint8_t* dst) {     // vertical
+static void VE16_C(uint8_t* dst) {  // vertical
  int j;
  for (j = 0; j < 16; ++j) {
    memcpy(dst + j * BPS, dst - BPS, 16);
  }
 }

-static void HE16_C(uint8_t* dst) {     // horizontal
+static void HE16_C(uint8_t* dst) {  // horizontal
  int j;
  for (j = 16; j > 0; --j) {
    memset(dst, dst[-1], 16);
@@ -223,7 +224,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
  }
 }

-static void DC16_C(uint8_t* dst) {    // DC
+static void DC16_C(uint8_t* dst) {  // DC
  int DC = 16;
  int j;
  for (j = 0; j < 16; ++j) {
@@ -232,7 +233,7 @@ static void DC16_C(uint8_t* dst) {    // DC
  Put16(DC >> 5, dst);
 }

-static void DC16NoTop_C(uint8_t* dst) {   // DC with top samples not available
+static void DC16NoTop_C(uint8_t* dst) {  // DC with top samples not available
  int DC = 8;
  int j;
  for (j = 0; j < 16; ++j) {
@@ -264,13 +265,13 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)

 #if !WEBP_NEON_OMIT_C_CODE
-static void VE4_C(uint8_t* dst) {    // vertical
+static void VE4_C(uint8_t* dst) {  // vertical
  const uint8_t* top = dst - BPS;
  const uint8_t vals[4] = {
-    AVG3(top[-1], top[0], top[1]),
-    AVG3(top[ 0], top[1], top[2]),
-    AVG3(top[ 1], top[2], top[3]),
-    AVG3(top[ 2], top[3], top[4])
+      AVG3(top[-1], top[0], top[1]),
+      AVG3(top[0], top[1], top[2]),
+      AVG3(top[1], top[2], top[3]),
+      AVG3(top[2], top[3], top[4]),
  };
  int i;
  for (i = 0; i < 4; ++i) {
@@ -279,7 +280,7 @@ static void VE4_C(uint8_t* dst) {    // vertical
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE

-static void HE4_C(uint8_t* dst) {    // horizontal
+static void HE4_C(uint8_t* dst) {  // horizontal
  const int A = dst[-1 - BPS];
  const int B = dst[-1];
  const int C = dst[-1 + BPS];
@@ -292,7 +293,7 @@ static void HE4_C(uint8_t* dst) {    // horizontal
 }

 #if !WEBP_NEON_OMIT_C_CODE
-static void DC4_C(uint8_t* dst) {   // DC
+static void DC4_C(uint8_t* dst) {  // DC
  uint32_t dc = 4;
  int i;
  for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
@@ -300,7 +301,7 @@ static void DC4_C(uint8_t* dst) {   // DC
  for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
 }

-static void RD4_C(uint8_t* dst) {   // Down-right
+static void RD4_C(uint8_t* dst) {  // Down-right
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@@ -310,16 +311,16 @@ static void RD4_C(uint8_t* dst) {   // Down-right
  const int B = dst[1 - BPS];
  const int C = dst[2 - BPS];
  const int D = dst[3 - BPS];
-  DST(0, 3)                                     = AVG3(J, K, L);
-  DST(1, 3) = DST(0, 2)                         = AVG3(I, J, K);
-  DST(2, 3) = DST(1, 2) = DST(0, 1)             = AVG3(X, I, J);
+  DST(0, 3) = AVG3(J, K, L);
+  DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
+  DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
  DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
-              DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
-                          DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
-                                      DST(3, 0) = AVG3(D, C, B);
+  DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
+  DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
+  DST(3, 0) = AVG3(D, C, B);
 }

-static void LD4_C(uint8_t* dst) {   // Down-Left
+static void LD4_C(uint8_t* dst) {  // Down-Left
  const int A = dst[0 - BPS];
  const int B = dst[1 - BPS];
  const int C = dst[2 - BPS];
@@ -328,17 +329,17 @@ static void LD4_C(uint8_t* dst) {   // Down-Left
  const int F = dst[5 - BPS];
  const int G = dst[6 - BPS];
  const int H = dst[7 - BPS];
-  DST(0, 0)                                     = AVG3(A, B, C);
-  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
-  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
+  DST(0, 0) = AVG3(A, B, C);
+  DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+  DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
-              DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
-                          DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
-                                      DST(3, 3) = AVG3(G, H, H);
+  DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+  DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+  DST(3, 3) = AVG3(G, H, H);
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE

-static void VR4_C(uint8_t* dst) {   // Vertical-Right
+static void VR4_C(uint8_t* dst) {  // Vertical-Right
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@@ -350,17 +351,17 @@ static void VR4_C(uint8_t* dst) {   // Vertical-Right
  DST(0, 0) = DST(1, 2) = AVG2(X, A);
  DST(1, 0) = DST(2, 2) = AVG2(A, B);
  DST(2, 0) = DST(3, 2) = AVG2(B, C);
-  DST(3, 0)             = AVG2(C, D);
+  DST(3, 0) = AVG2(C, D);

-  DST(0, 3) =             AVG3(K, J, I);
-  DST(0, 2) =             AVG3(J, I, X);
+  DST(0, 3) = AVG3(K, J, I);
+  DST(0, 2) = AVG3(J, I, X);
  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
-  DST(3, 1) =             AVG3(B, C, D);
+  DST(3, 1) = AVG3(B, C, D);
 }

-static void VL4_C(uint8_t* dst) {   // Vertical-Left
+static void VL4_C(uint8_t* dst) {  // Vertical-Left
  const int A = dst[0 - BPS];
  const int B = dst[1 - BPS];
  const int C = dst[2 - BPS];
@@ -369,32 +370,31 @@ static void VL4_C(uint8_t* dst) {   // Vertical-Left
  const int F = dst[5 - BPS];
  const int G = dst[6 - BPS];
  const int H = dst[7 - BPS];
-  DST(0, 0) =             AVG2(A, B);
+  DST(0, 0) = AVG2(A, B);
  DST(1, 0) = DST(0, 2) = AVG2(B, C);
  DST(2, 0) = DST(1, 2) = AVG2(C, D);
  DST(3, 0) = DST(2, 2) = AVG2(D, E);

-  DST(0, 1) =             AVG3(A, B, C);
+  DST(0, 1) = AVG3(A, B, C);
  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
-              DST(3, 2) = AVG3(E, F, G);
-              DST(3, 3) = AVG3(F, G, H);
+  DST(3, 2) = AVG3(E, F, G);
+  DST(3, 3) = AVG3(F, G, H);
 }

-static void HU4_C(uint8_t* dst) {   // Horizontal-Up
+static void HU4_C(uint8_t* dst) {  // Horizontal-Up
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
  const int L = dst[-1 + 3 * BPS];
-  DST(0, 0) =             AVG2(I, J);
+  DST(0, 0) = AVG2(I, J);
  DST(2, 0) = DST(0, 1) = AVG2(J, K);
  DST(2, 1) = DST(0, 2) = AVG2(K, L);
-  DST(1, 0) =             AVG3(I, J, K);
+  DST(1, 0) = AVG3(I, J, K);
  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
-  DST(3, 2) = DST(2, 2) =
-    DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+  DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }

 static void HD4_C(uint8_t* dst) {  // Horizontal-Down
@@ -410,14 +410,14 @@ static void HD4_C(uint8_t* dst) {  // Horizontal-Down
  DST(0, 0) = DST(2, 1) = AVG2(I, X);
  DST(0, 1) = DST(2, 2) = AVG2(J, I);
  DST(0, 2) = DST(2, 3) = AVG2(K, J);
-  DST(0, 3)             = AVG2(L, K);
+  DST(0, 3) = AVG2(L, K);

-  DST(3, 0)             = AVG3(A, B, C);
-  DST(2, 0)             = AVG3(X, A, B);
+  DST(3, 0) = AVG3(A, B, C);
+  DST(2, 0) = AVG3(X, A, B);
  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
-  DST(1, 3)             = AVG3(L, K, J);
+  DST(1, 3) = AVG3(L, K, J);
 }

 #undef DST
@@ -430,14 +430,14 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES];
 // Chroma

 #if !WEBP_NEON_OMIT_C_CODE
-static void VE8uv_C(uint8_t* dst) {    // vertical
+static void VE8uv_C(uint8_t* dst) {  // vertical
  int j;
  for (j = 0; j < 8; ++j) {
    memcpy(dst + j * BPS, dst - BPS, 8);
  }
 }

-static void HE8uv_C(uint8_t* dst) {    // horizontal
+static void HE8uv_C(uint8_t* dst) {  // horizontal
  int j;
  for (j = 0; j < 8; ++j) {
    memset(dst, dst[-1], 8);
@@ -453,7 +453,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
  }
 }

-static void DC8uv_C(uint8_t* dst) {     // DC
+static void DC8uv_C(uint8_t* dst) {  // DC
  int dc0 = 8;
  int i;
  for (i = 0; i < 8; ++i) {
@@ -462,7 +462,7 @@ static void DC8uv_C(uint8_t* dst) {     // DC
  Put8x8uv(dc0 >> 4, dst);
 }

-static void DC8uvNoLeft_C(uint8_t* dst) {   // DC with no left samples
+static void DC8uvNoLeft_C(uint8_t* dst) {  // DC with no left samples
  int dc0 = 4;
  int i;
  for (i = 0; i < 8; ++i) {
@@ -480,7 +480,7 @@ static void DC8uvNoTop_C(uint8_t* dst) {  // DC with no top samples
  Put8x8uv(dc0 >> 3, dst);
 }

-static void DC8uvNoTopLeft_C(uint8_t* dst) {    // DC with nothing
+static void DC8uvNoTopLeft_C(uint8_t* dst) {  // DC with nothing
  Put8x8uv(0x80, dst);
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE
@@ -493,46 +493,46 @@ VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 // 4 pixels in, 2 pixels out
 static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];  // in [-893,892]
  const int a1 = VP8ksclip2[(a + 4) >> 3];            // in [-16,15]
  const int a2 = VP8ksclip2[(a + 3) >> 3];
  p[-step] = VP8kclip1[p0 + a2];
-  p[    0] = VP8kclip1[q0 - a1];
+  p[0] = VP8kclip1[q0 - a1];
 }

 // 4 pixels in, 4 pixels out
 static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  const int a = 3 * (q0 - p0);
  const int a1 = VP8ksclip2[(a + 4) >> 3];
  const int a2 = VP8ksclip2[(a + 3) >> 3];
  const int a3 = (a1 + 1) >> 1;
-  p[-2*step] = VP8kclip1[p1 + a3];
-  p[-  step] = VP8kclip1[p0 + a2];
-  p[      0] = VP8kclip1[q0 - a1];
-  p[   step] = VP8kclip1[q1 - a3];
+  p[-2 * step] = VP8kclip1[p1 + a3];
+  p[-step] = VP8kclip1[p0 + a2];
+  p[0] = VP8kclip1[q0 - a1];
+  p[step] = VP8kclip1[q1 - a3];
 }

 // 6 pixels in, 6 pixels out
 static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
-  const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
-  const int q0 = p[0], q1 = p[step], q2 = p[2*step];
+  const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
-  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
-  p[-3*step] = VP8kclip1[p2 + a3];
-  p[-2*step] = VP8kclip1[p1 + a2];
-  p[-  step] = VP8kclip1[p0 + a1];
-  p[      0] = VP8kclip1[q0 - a1];
-  p[   step] = VP8kclip1[q1 - a2];
-  p[ 2*step] = VP8kclip1[q2 - a3];
+  const int a3 = (9 * a + 63) >> 7;   // eq. to ((1 * a + 7) * 9) >> 7
+  p[-3 * step] = VP8kclip1[p2 + a3];
+  p[-2 * step] = VP8kclip1[p1 + a2];
+  p[-step] = VP8kclip1[p0 + a1];
+  p[0] = VP8kclip1[q0 - a1];
+  p[step] = VP8kclip1[q1 - a2];
+  p[2 * step] = VP8kclip1[q2 - a3];
 }

 static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
@@ -545,8 +545,8 @@ static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
 #endif  // !WEBP_NEON_OMIT_C_CODE

 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
-                                      int step, int t, int it) {
+static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p, int step, int t,
+                                      int it) {
  const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
  const int p0 = p[-step], q0 = p[0];
  const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
@@ -602,9 +602,8 @@ static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
 // Complex In-loop filtering (Paragraph 15.3)

 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
-                                       int hstride, int vstride, int size,
-                                       int thresh, int ithresh,
+static WEBP_INLINE void FilterLoop26_C(uint8_t* p, int hstride, int vstride,
+                                       int size, int thresh, int ithresh,
                                       int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
@@ -619,9 +618,8 @@ static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
  }
 }

-static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
-                                       int hstride, int vstride, int size,
-                                       int thresh, int ithresh,
+static WEBP_INLINE void FilterLoop24_C(uint8_t* p, int hstride, int vstride,
+                                       int size, int thresh, int ithresh,
                                       int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
@@ -639,19 +637,19 @@ static WEBP_INLINE void FilterLoop24_C(uint8_t* p,

 #if !WEBP_NEON_OMIT_C_CODE
 // on macroblock edges
-static void VFilter16_C(uint8_t* p, int stride,
-                        int thresh, int ithresh, int hev_thresh) {
+static void VFilter16_C(uint8_t* p, int stride, int thresh, int ithresh,
+                        int hev_thresh) {
  FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
 }

-static void HFilter16_C(uint8_t* p, int stride,
-                        int thresh, int ithresh, int hev_thresh) {
+static void HFilter16_C(uint8_t* p, int stride, int thresh, int ithresh,
+                        int hev_thresh) {
  FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
 }

 // on three inner edges
-static void VFilter16i_C(uint8_t* p, int stride,
-                         int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i_C(uint8_t* p, int stride, int thresh, int ithresh,
+                         int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4 * stride;
@@ -661,8 +659,8 @@ static void VFilter16i_C(uint8_t* p, int stride,
 #endif  // !WEBP_NEON_OMIT_C_CODE

 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static void HFilter16i_C(uint8_t* p, int stride,
-                         int thresh, int ithresh, int hev_thresh) {
+static void HFilter16i_C(uint8_t* p, int stride, int thresh, int ithresh,
+                         int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4;
--- a/src/dsp/dec_clip_tables.c
+++ b/src/dsp/dec_clip_tables.c
@@ -12,323 +12,319 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include "src/dsp/cpu.h"
-#include "src/webp/types.h"
 #include "src/dsp/dsp.h"
+#include "src/webp/types.h"

 // define to 0 to have run-time table initialization
 #if !defined(USE_STATIC_TABLES)
-#define USE_STATIC_TABLES 1   // ALTERNATE_CODE
+#define USE_STATIC_TABLES 1  // ALTERNATE_CODE
 #endif

 #if (USE_STATIC_TABLES == 1)

 static const uint8_t abs0[255 + 255 + 1] = {
-  0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
-  0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8,
-  0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, 0xdd, 0xdc,
-  0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0,
-  0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4,
-  0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8,
-  0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac,
-  0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0,
-  0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94,
-  0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88,
-  0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c,
-  0x7b, 0x7a, 0x79, 0x78, 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70,
-  0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
-  0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58,
-  0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c,
-  0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
-  0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
-  0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
-  0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c,
-  0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
-  0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
-  0x03, 0x02, 0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
-  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
-  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
-  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
-  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
-  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
-  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
-  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
-  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
-  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
-  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
-  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
-  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
-  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
-  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
-  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
-  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
-  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
-  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
-};
+    0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
+    0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8,
+    0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, 0xdd, 0xdc,
+    0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0,
+    0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4,
+    0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8,
+    0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac,
+    0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0,
+    0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94,
+    0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88,
+    0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c,
+    0x7b, 0x7a, 0x79, 0x78, 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70,
+    0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
+    0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58,
+    0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c,
+    0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
+    0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
+    0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
+    0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c,
+    0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+    0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
+    0x03, 0x02, 0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+    0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+    0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+    0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
+    0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+    0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
+    0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+    0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
+    0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+    0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+    0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
+    0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+    0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+    0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+    0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
+    0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+    0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
+    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+    0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
+    0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};

 static const uint8_t sclip1[1020 + 1020 + 1] = {
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-  0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93,
-  0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
-  0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab,
-  0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
-  0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3,
-  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
-  0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
-  0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
-  0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
-  0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
-  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
-  0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
-  0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
-  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
-  0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
-  0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53,
-  0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
-  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
-  0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
-  0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
-};
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab,
+    0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
+    0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
+    0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
+    0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
+    0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+    0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53,
+    0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
+    0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+    0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f};

 static const uint8_t sclip2[112 + 112 + 1] = {
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
-  0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f
-};
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
+    0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};

 static const uint8_t clip1[255 + 511 + 1] = {
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
-  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
-  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
-  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
-  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
-  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
-  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
-  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
-  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
-  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
-  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
-  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
-  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
-  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
-  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
-  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
-  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
-  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
-  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-};
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+    0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+    0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+    0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
+    0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+    0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
+    0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+    0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
+    0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+    0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+    0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
+    0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+    0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+    0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+    0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
+    0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+    0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
+    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+    0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
+    0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};

 #else

@@ -342,7 +338,7 @@ static uint8_t clip1[255 + 511 + 1];
 // and make sure it's set to true _last_ (so as to be thread-safe)
 static volatile int tables_ok = 0;

-#endif    // USE_STATIC_TABLES
+#endif  // USE_STATIC_TABLES

 const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
 const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
@@ -367,5 +363,5 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
    }
    tables_ok = 1;
  }
-#endif    // USE_STATIC_TABLES
+#endif  // USE_STATIC_TABLES
 }
--- a/src/dsp/dec_mips32.c
+++ b/src/dsp/dec_mips32.c
@@ -33,7 +33,7 @@ static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
  const int a1 = VP8ksclip2[(a + 4) >> 3];
  const int a2 = VP8ksclip2[(a + 3) >> 3];
  p[-step] = VP8kclip1[p0 + a2];
-  p[    0] = VP8kclip1[q0 - a1];
+  p[0] = VP8kclip1[q0 - a1];
 }

 // 4 pixels in, 4 pixels out
@@ -44,9 +44,9 @@ static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
  const int a2 = VP8ksclip2[(a + 3) >> 3];
  const int a3 = (a1 + 1) >> 1;
  p[-2 * step] = VP8kclip1[p1 + a3];
-  p[-    step] = VP8kclip1[p0 + a2];
-  p[        0] = VP8kclip1[q0 - a1];
-  p[     step] = VP8kclip1[q1 - a3];
+  p[-step] = VP8kclip1[p0 + a2];
+  p[0] = VP8kclip1[q0 - a1];
+  p[step] = VP8kclip1[q1 - a3];
 }

 // 6 pixels in, 6 pixels out
@@ -57,13 +57,13 @@ static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
-  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+  const int a3 = (9 * a + 63) >> 7;   // eq. to ((1 * a + 7) * 9) >> 7
  p[-3 * step] = VP8kclip1[p2 + a3];
  p[-2 * step] = VP8kclip1[p1 + a2];
-  p[-    step] = VP8kclip1[p0 + a1];
-  p[        0] = VP8kclip1[q0 - a1];
-  p[     step] = VP8kclip1[q1 - a2];
-  p[ 2 * step] = VP8kclip1[q2 - a3];
+  p[-step] = VP8kclip1[p0 + a1];
+  p[0] = VP8kclip1[q0 - a1];
+  p[step] = VP8kclip1[q1 - a2];
+  p[2 * step] = VP8kclip1[q2 - a3];
 }

 static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
@@ -76,8 +76,8 @@ static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
  return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
 }

-static WEBP_INLINE int needs_filter2(const uint8_t* p,
-                                     int step, int t, int it) {
+static WEBP_INLINE int needs_filter2(const uint8_t* p, int step, int t,
+                                     int it) {
  const int p3 = p[-4 * step], p2 = p[-3 * step];
  const int p1 = p[-2 * step], p0 = p[-step];
  const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
@@ -89,9 +89,9 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,
         abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
 }

-static WEBP_INLINE void FilterLoop26(uint8_t* p,
-                                     int hstride, int vstride, int size,
-                                     int thresh, int ithresh, int hev_thresh) {
+static WEBP_INLINE void FilterLoop26(uint8_t* p, int hstride, int vstride,
+                                     int size, int thresh, int ithresh,
+                                     int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
    if (needs_filter2(p, hstride, thresh2, ithresh)) {
@@ -105,9 +105,9 @@ static WEBP_INLINE void FilterLoop26(uint8_t* p,
  }
 }

-static WEBP_INLINE void FilterLoop24(uint8_t* p,
-                                     int hstride, int vstride, int size,
-                                     int thresh, int ithresh, int hev_thresh) {
+static WEBP_INLINE void FilterLoop24(uint8_t* p, int hstride, int vstride,
+                                     int size, int thresh, int ithresh,
+                                     int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
    if (needs_filter2(p, hstride, thresh2, ithresh)) {
@@ -122,13 +122,13 @@ static WEBP_INLINE void FilterLoop24(uint8_t* p,
 }

 // on macroblock edges
-static void VFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh,
+                      int hev_thresh) {
  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
 }

-static void HFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void HFilter16(uint8_t* p, int stride, int thresh, int ithresh,
+                      int hev_thresh) {
  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
 }

@@ -158,8 +158,8 @@ static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
 }

 // on three inner edges
-static void VFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i(uint8_t* p, int stride, int thresh, int ithresh,
+                       int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4 * stride;
@@ -167,8 +167,8 @@ static void VFilter16i(uint8_t* p, int stride,
  }
 }

-static void HFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
+static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh,
+                       int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4;
--- a/src/dsp/dec_mips_dsp_r2.c
+++ b/src/dsp/dec_mips_dsp_r2.c
--- a/src/dsp/dec_msa.c
+++ b/src/dsp/dec_msa.c
@@ -11,7 +11,6 @@
 //
 // Author(s):  Prashant Patil   (prashant.patil@imgtec.com)

-
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_MSA)
@@ -21,29 +20,30 @@
 //------------------------------------------------------------------------------
 // Transforms

-#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) {  \
-  v4i32 a1_m, b1_m, c1_m, d1_m;                                  \
-  v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m;                  \
-  const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091);           \
-  const v4i32 sinpi8sqrt2 = __msa_fill_w(35468);                 \
+#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3)    \
+  {                                                              \
+    v4i32 a1_m, b1_m, c1_m, d1_m;                                \
+    v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m;                \
+    const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091);         \
+    const v4i32 sinpi8sqrt2 = __msa_fill_w(35468);               \
                                                                 \
-  a1_m = in0 + in2;                                              \
-  b1_m = in0 - in2;                                              \
-  c_tmp1_m = (in1 * sinpi8sqrt2) >> 16;                          \
-  c_tmp2_m = in3 + ((in3 * cospi8sqrt2minus1) >> 16);            \
-  c1_m = c_tmp1_m - c_tmp2_m;                                    \
-  d_tmp1_m = in1 + ((in1 * cospi8sqrt2minus1) >> 16);            \
-  d_tmp2_m = (in3 * sinpi8sqrt2) >> 16;                          \
-  d1_m = d_tmp1_m + d_tmp2_m;                                    \
-  BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3);   \
-}
+    a1_m = in0 + in2;                                            \
+    b1_m = in0 - in2;                                            \
+    c_tmp1_m = (in1 * sinpi8sqrt2) >> 16;                        \
+    c_tmp2_m = in3 + ((in3 * cospi8sqrt2minus1) >> 16);          \
+    c1_m = c_tmp1_m - c_tmp2_m;                                  \
+    d_tmp1_m = in1 + ((in1 * cospi8sqrt2minus1) >> 16);          \
+    d_tmp2_m = (in3 * sinpi8sqrt2) >> 16;                        \
+    d1_m = d_tmp1_m + d_tmp2_m;                                  \
+    BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
+  }

 static void TransformOne(const int16_t* WEBP_RESTRICT in,
                         uint8_t* WEBP_RESTRICT dst) {
  v8i16 input0, input1;
  v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
  v4i32 res0, res1, res2, res3;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  v16i8 dest0, dest1, dest2, dest3;

  LD_SH2(in, 8, input0, input1);
@@ -55,10 +55,10 @@ static void TransformOne(const int16_t* WEBP_RESTRICT in,
  SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
  TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
  LD_SB4(dst, BPS, dest0, dest1, dest2, dest3);
-  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
-             res0, res1, res2, res3);
-  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
-             res0, res1, res2, res3);
+  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
+             res2, res3);
+  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
+             res3);
  ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
  CLIP_SW4_0_255(res0, res1, res2, res3);
  PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
@@ -77,10 +77,10 @@ static void TransformTwo(const int16_t* WEBP_RESTRICT in,
 static void TransformWHT(const int16_t* WEBP_RESTRICT in,
                         int16_t* WEBP_RESTRICT out) {
  v8i16 input0, input1;
-  const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
-  const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
-  const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
-  const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
+  const v8i16 mask0 = {0, 1, 2, 3, 8, 9, 10, 11};
+  const v8i16 mask1 = {4, 5, 6, 7, 12, 13, 14, 15};
+  const v8i16 mask2 = {0, 4, 8, 12, 1, 5, 9, 13};
+  const v8i16 mask3 = {3, 7, 11, 15, 2, 6, 10, 14};
  v8i16 tmp0, tmp1, tmp2, tmp3;
  v8i16 out0, out1;

@@ -131,24 +131,23 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
  const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
  const int in2 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
  const int in3 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
-  v4i32 tmp0 = { 0 };
+  v4i32 tmp0 = {0};
  v4i32 out0 = __msa_fill_w(a + d4);
  v4i32 out1 = __msa_fill_w(a + c4);
  v4i32 out2 = __msa_fill_w(a - c4);
  v4i32 out3 = __msa_fill_w(a - d4);
  v4i32 res0, res1, res2, res3;
-  const v4i32 zero = { 0 };
+  const v4i32 zero = {0};
  v16u8 dest0, dest1, dest2, dest3;

  INSERT_W4_SW(in3, in2, -in2, -in3, tmp0);
-  ADD4(out0, tmp0, out1, tmp0, out2, tmp0, out3, tmp0,
-       out0, out1, out2, out3);
+  ADD4(out0, tmp0, out1, tmp0, out2, tmp0, out3, tmp0, out0, out1, out2, out3);
  SRAI_W4_SW(out0, out1, out2, out3, 3);
  LD_UB4(dst, BPS, dest0, dest1, dest2, dest3);
-  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
-             res0, res1, res2, res3);
-  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
-             res0, res1, res2, res3);
+  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
+             res2, res3);
+  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
+             res3);
  ADD4(res0, out0, res1, out1, res2, out2, res3, out3, res0, res1, res2, res3);
  CLIP_SW4_0_255(res0, res1, res2, res3);
  PCKEV_B2_SW(res0, res1, res2, res3, out0, out1);
@@ -159,189 +158,198 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
 //------------------------------------------------------------------------------
 // Edge filtering functions

-#define FLIP_SIGN2(in0, in1, out0, out1) {  \
-  out0 = (v16i8)__msa_xori_b(in0, 0x80);    \
-  out1 = (v16i8)__msa_xori_b(in1, 0x80);    \
-}
+#define FLIP_SIGN2(in0, in1, out0, out1)   \
+  {                                        \
+    out0 = (v16i8)__msa_xori_b(in0, 0x80); \
+    out1 = (v16i8)__msa_xori_b(in1, 0x80); \
+  }

-#define FLIP_SIGN4(in0, in1, in2, in3, out0, out1, out2, out3) {  \
-  FLIP_SIGN2(in0, in1, out0, out1);                               \
-  FLIP_SIGN2(in2, in3, out2, out3);                               \
-}
+#define FLIP_SIGN4(in0, in1, in2, in3, out0, out1, out2, out3) \
+  {                                                            \
+    FLIP_SIGN2(in0, in1, out0, out1);                          \
+    FLIP_SIGN2(in2, in3, out2, out3);                          \
+  }

-#define FILT_VAL(q0_m, p0_m, mask, filt) do {  \
-  v16i8 q0_sub_p0;                             \
-  q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m);      \
-  filt = __msa_adds_s_b(filt, q0_sub_p0);      \
-  filt = __msa_adds_s_b(filt, q0_sub_p0);      \
-  filt = __msa_adds_s_b(filt, q0_sub_p0);      \
-  filt = filt & mask;                          \
-} while (0)
+#define FILT_VAL(q0_m, p0_m, mask, filt)    \
+  do {                                      \
+    v16i8 q0_sub_p0;                        \
+    q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \
+    filt = __msa_adds_s_b(filt, q0_sub_p0); \
+    filt = __msa_adds_s_b(filt, q0_sub_p0); \
+    filt = __msa_adds_s_b(filt, q0_sub_p0); \
+    filt = filt & mask;                     \
+  } while (0)

-#define FILT2(q_m, p_m, q, p) do {            \
-  u_r = SRAI_H(temp1, 7);                     \
-  u_r = __msa_sat_s_h(u_r, 7);                \
-  u_l = SRAI_H(temp3, 7);                     \
-  u_l = __msa_sat_s_h(u_l, 7);                \
-  u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r);  \
-  q_m = __msa_subs_s_b(q_m, u);               \
-  p_m = __msa_adds_s_b(p_m, u);               \
-  q = __msa_xori_b((v16u8)q_m, 0x80);         \
-  p = __msa_xori_b((v16u8)p_m, 0x80);         \
-} while (0)
+#define FILT2(q_m, p_m, q, p)                  \
+  do {                                         \
+    u_r = SRAI_H(temp1, 7);                    \
+    u_r = __msa_sat_s_h(u_r, 7);               \
+    u_l = SRAI_H(temp3, 7);                    \
+    u_l = __msa_sat_s_h(u_l, 7);               \
+    u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
+    q_m = __msa_subs_s_b(q_m, u);              \
+    p_m = __msa_adds_s_b(p_m, u);              \
+    q = __msa_xori_b((v16u8)q_m, 0x80);        \
+    p = __msa_xori_b((v16u8)p_m, 0x80);        \
+  } while (0)

-#define LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev) do {  \
-  v16i8 p1_m, p0_m, q0_m, q1_m;                         \
-  v16i8 filt, t1, t2;                                   \
-  const v16i8 cnst4b = __msa_ldi_b(4);                  \
-  const v16i8 cnst3b = __msa_ldi_b(3);                  \
+#define LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev)       \
+  do {                                                  \
+    v16i8 p1_m, p0_m, q0_m, q1_m;                       \
+    v16i8 filt, t1, t2;                                 \
+    const v16i8 cnst4b = __msa_ldi_b(4);                \
+    const v16i8 cnst3b = __msa_ldi_b(3);                \
                                                        \
-  FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m);   \
-  filt = __msa_subs_s_b(p1_m, q1_m);                    \
-  filt = filt & hev;                                    \
-  FILT_VAL(q0_m, p0_m, mask, filt);                     \
-  t1 = __msa_adds_s_b(filt, cnst4b);                    \
-  t1 = SRAI_B(t1, 3);                                   \
-  t2 = __msa_adds_s_b(filt, cnst3b);                    \
-  t2 = SRAI_B(t2, 3);                                   \
-  q0_m = __msa_subs_s_b(q0_m, t1);                      \
-  q0 = __msa_xori_b((v16u8)q0_m, 0x80);                 \
-  p0_m = __msa_adds_s_b(p0_m, t2);                      \
-  p0 = __msa_xori_b((v16u8)p0_m, 0x80);                 \
-  filt = __msa_srari_b(t1, 1);                          \
-  hev = __msa_xori_b(hev, 0xff);                        \
-  filt = filt & hev;                                    \
-  q1_m = __msa_subs_s_b(q1_m, filt);                    \
-  q1 = __msa_xori_b((v16u8)q1_m, 0x80);                 \
-  p1_m = __msa_adds_s_b(p1_m, filt);                    \
-  p1 = __msa_xori_b((v16u8)p1_m, 0x80);                 \
-} while (0)
+    FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m); \
+    filt = __msa_subs_s_b(p1_m, q1_m);                  \
+    filt = filt & hev;                                  \
+    FILT_VAL(q0_m, p0_m, mask, filt);                   \
+    t1 = __msa_adds_s_b(filt, cnst4b);                  \
+    t1 = SRAI_B(t1, 3);                                 \
+    t2 = __msa_adds_s_b(filt, cnst3b);                  \
+    t2 = SRAI_B(t2, 3);                                 \
+    q0_m = __msa_subs_s_b(q0_m, t1);                    \
+    q0 = __msa_xori_b((v16u8)q0_m, 0x80);               \
+    p0_m = __msa_adds_s_b(p0_m, t2);                    \
+    p0 = __msa_xori_b((v16u8)p0_m, 0x80);               \
+    filt = __msa_srari_b(t1, 1);                        \
+    hev = __msa_xori_b(hev, 0xff);                      \
+    filt = filt & hev;                                  \
+    q1_m = __msa_subs_s_b(q1_m, filt);                  \
+    q1 = __msa_xori_b((v16u8)q1_m, 0x80);               \
+    p1_m = __msa_adds_s_b(p1_m, filt);                  \
+    p1 = __msa_xori_b((v16u8)p1_m, 0x80);               \
+  } while (0)

-#define LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) do {  \
-  v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m;                   \
-  v16i8 u, filt, t1, t2, filt_sign;                           \
-  v8i16 filt_r, filt_l, u_r, u_l;                             \
-  v8i16 temp0, temp1, temp2, temp3;                           \
-  const v16i8 cnst4b = __msa_ldi_b(4);                        \
-  const v16i8 cnst3b = __msa_ldi_b(3);                        \
-  const v8i16 cnst9h = __msa_ldi_h(9);                        \
-  const v8i16 cnst63h = __msa_ldi_h(63);                      \
+#define LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \
+  do {                                                  \
+    v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m;           \
+    v16i8 u, filt, t1, t2, filt_sign;                   \
+    v8i16 filt_r, filt_l, u_r, u_l;                     \
+    v8i16 temp0, temp1, temp2, temp3;                   \
+    const v16i8 cnst4b = __msa_ldi_b(4);                \
+    const v16i8 cnst3b = __msa_ldi_b(3);                \
+    const v8i16 cnst9h = __msa_ldi_h(9);                \
+    const v8i16 cnst63h = __msa_ldi_h(63);              \
+                                                        \
+    FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m); \
+    filt = __msa_subs_s_b(p1_m, q1_m);                  \
+    FILT_VAL(q0_m, p0_m, mask, filt);                   \
+    FLIP_SIGN2(p2, q2, p2_m, q2_m);                     \
+    t2 = filt & hev;                                    \
+    /* filt_val &= ~hev */                              \
+    hev = __msa_xori_b(hev, 0xff);                      \
+    filt = filt & hev;                                  \
+    t1 = __msa_adds_s_b(t2, cnst4b);                    \
+    t1 = SRAI_B(t1, 3);                                 \
+    t2 = __msa_adds_s_b(t2, cnst3b);                    \
+    t2 = SRAI_B(t2, 3);                                 \
+    q0_m = __msa_subs_s_b(q0_m, t1);                    \
+    p0_m = __msa_adds_s_b(p0_m, t2);                    \
+    filt_sign = __msa_clti_s_b(filt, 0);                \
+    ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l);       \
+    /* update q2/p2 */                                  \
+    temp0 = filt_r * cnst9h;                            \
+    temp1 = temp0 + cnst63h;                            \
+    temp2 = filt_l * cnst9h;                            \
+    temp3 = temp2 + cnst63h;                            \
+    FILT2(q2_m, p2_m, q2, p2);                          \
+    /* update q1/p1 */                                  \
+    temp1 = temp1 + temp0;                              \
+    temp3 = temp3 + temp2;                              \
+    FILT2(q1_m, p1_m, q1, p1);                          \
+    /* update q0/p0 */                                  \
+    temp1 = temp1 + temp0;                              \
+    temp3 = temp3 + temp2;                              \
+    FILT2(q0_m, p0_m, q0, p0);                          \
+  } while (0)
+
+#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
+                     limit_in, b_limit_in, thresh_in, hev_out, mask_out)     \
+  do {                                                                       \
+    v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m;            \
+    v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m;            \
+    v16u8 flat_out;                                                          \
+                                                                             \
+    /* absolute subtraction of pixel values */                               \
+    p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in);                             \
+    p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in);                             \
+    p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in);                             \
+    q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in);                             \
+    q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in);                             \
+    q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in);                             \
+    p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in);                             \
+    p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in);                             \
+    /* calculation of hev */                                                 \
+    flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m);                    \
+    hev_out = (thresh_in < flat_out);                                        \
+    /* calculation of mask */                                                \
+    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m);               \
+    p1_asub_q1_m = SRAI_B(p1_asub_q1_m, 1);                                  \
+    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m);               \
+    mask_out = (b_limit_in < p0_asub_q0_m);                                  \
+    mask_out = __msa_max_u_b(flat_out, mask_out);                            \
+    p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m);                \
+    mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out);                        \
+    q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m);                \
+    mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out);                        \
+    mask_out = (limit_in < mask_out);                                        \
+    mask_out = __msa_xori_b(mask_out, 0xff);                                 \
+  } while (0)
+
+#define ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride)       \
+  do {                                                           \
+    const uint16_t tmp0_h = __msa_copy_s_h((v8i16)in1, in1_idx); \
+    const uint32_t tmp0_w = __msa_copy_s_w((v4i32)in0, in0_idx); \
+    SW(tmp0_w, pdst);                                            \
+    SH(tmp0_h, pdst + stride);                                   \
+  } while (0)
+
+#define ST6x4_UB(in0, start_in0_idx, in1, start_in1_idx, pdst, stride)  \
+  do {                                                                  \
+    uint8_t* ptmp1 = (uint8_t*)pdst;                                    \
+    ST6x1_UB(in0, start_in0_idx, in1, start_in1_idx, ptmp1, 4);         \
+    ptmp1 += stride;                                                    \
+    ST6x1_UB(in0, start_in0_idx + 1, in1, start_in1_idx + 1, ptmp1, 4); \
+    ptmp1 += stride;                                                    \
+    ST6x1_UB(in0, start_in0_idx + 2, in1, start_in1_idx + 2, ptmp1, 4); \
+    ptmp1 += stride;                                                    \
+    ST6x1_UB(in0, start_in0_idx + 3, in1, start_in1_idx + 3, ptmp1, 4); \
+  } while (0)
+
+#define LPF_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask)           \
+  do {                                                              \
+    v16i8 p1_m, p0_m, q0_m, q1_m, filt, filt1, filt2;               \
+    const v16i8 cnst4b = __msa_ldi_b(4);                            \
+    const v16i8 cnst3b = __msa_ldi_b(3);                            \
+                                                                    \
+    FLIP_SIGN4(p1_in, p0_in, q0_in, q1_in, p1_m, p0_m, q0_m, q1_m); \
+    filt = __msa_subs_s_b(p1_m, q1_m);                              \
+    FILT_VAL(q0_m, p0_m, mask, filt);                               \
+    filt1 = __msa_adds_s_b(filt, cnst4b);                           \
+    filt1 = SRAI_B(filt1, 3);                                       \
+    filt2 = __msa_adds_s_b(filt, cnst3b);                           \
+    filt2 = SRAI_B(filt2, 3);                                       \
+    q0_m = __msa_subs_s_b(q0_m, filt1);                             \
+    p0_m = __msa_adds_s_b(p0_m, filt2);                             \
+    q0_in = __msa_xori_b((v16u8)q0_m, 0x80);                        \
+    p0_in = __msa_xori_b((v16u8)p0_m, 0x80);                        \
+  } while (0)
+
+#define LPF_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask)        \
+  do {                                                        \
+    v16u8 p1_a_sub_q1, p0_a_sub_q0;                           \
                                                              \
-  FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m);         \
-  filt = __msa_subs_s_b(p1_m, q1_m);                          \
-  FILT_VAL(q0_m, p0_m, mask, filt);                           \
-  FLIP_SIGN2(p2, q2, p2_m, q2_m);                             \
-  t2 = filt & hev;                                            \
-  /* filt_val &= ~hev */                                      \
-  hev = __msa_xori_b(hev, 0xff);                              \
-  filt = filt & hev;                                          \
-  t1 = __msa_adds_s_b(t2, cnst4b);                            \
-  t1 = SRAI_B(t1, 3);                                         \
-  t2 = __msa_adds_s_b(t2, cnst3b);                            \
-  t2 = SRAI_B(t2, 3);                                         \
-  q0_m = __msa_subs_s_b(q0_m, t1);                            \
-  p0_m = __msa_adds_s_b(p0_m, t2);                            \
-  filt_sign = __msa_clti_s_b(filt, 0);                        \
-  ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l);               \
-  /* update q2/p2 */                                          \
-  temp0 = filt_r * cnst9h;                                    \
-  temp1 = temp0 + cnst63h;                                    \
-  temp2 = filt_l * cnst9h;                                    \
-  temp3 = temp2 + cnst63h;                                    \
-  FILT2(q2_m, p2_m, q2, p2);                                  \
-  /* update q1/p1 */                                          \
-  temp1 = temp1 + temp0;                                      \
-  temp3 = temp3 + temp2;                                      \
-  FILT2(q1_m, p1_m, q1, p1);                                  \
-  /* update q0/p0 */                                          \
-  temp1 = temp1 + temp0;                                      \
-  temp3 = temp3 + temp2;                                      \
-  FILT2(q0_m, p0_m, q0, p0);                                  \
-} while (0)
+    p0_a_sub_q0 = __msa_asub_u_b(p0, q0);                     \
+    p1_a_sub_q1 = __msa_asub_u_b(p1, q1);                     \
+    p1_a_sub_q1 = (v16u8)__msa_srli_b((v16i8)p1_a_sub_q1, 1); \
+    p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0);   \
+    mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1);          \
+    mask = (mask <= b_limit);                                 \
+  } while (0)

-#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in,                 \
-                     q0_in, q1_in, q2_in, q3_in,                 \
-                     limit_in, b_limit_in, thresh_in,            \
-                     hev_out, mask_out) do {                     \
-  v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m;  \
-  v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m;  \
-  v16u8 flat_out;                                                \
-                                                                 \
-  /* absolute subtraction of pixel values */                     \
-  p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in);                   \
-  p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in);                   \
-  p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in);                   \
-  q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in);                   \
-  q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in);                   \
-  q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in);                   \
-  p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in);                   \
-  p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in);                   \
-  /* calculation of hev */                                       \
-  flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m);          \
-  hev_out = (thresh_in < flat_out);                              \
-  /* calculation of mask */                                      \
-  p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m);     \
-  p1_asub_q1_m = SRAI_B(p1_asub_q1_m, 1);                        \
-  p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m);     \
-  mask_out = (b_limit_in < p0_asub_q0_m);                        \
-  mask_out = __msa_max_u_b(flat_out, mask_out);                  \
-  p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m);      \
-  mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out);              \
-  q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m);      \
-  mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out);              \
-  mask_out = (limit_in < mask_out);                              \
-  mask_out = __msa_xori_b(mask_out, 0xff);                       \
-} while (0)
-
-#define ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride) do { \
-  const uint16_t tmp0_h = __msa_copy_s_h((v8i16)in1, in1_idx);  \
-  const uint32_t tmp0_w = __msa_copy_s_w((v4i32)in0, in0_idx);  \
-  SW(tmp0_w, pdst);                                             \
-  SH(tmp0_h, pdst + stride);                                    \
-} while (0)
-
-#define ST6x4_UB(in0, start_in0_idx, in1, start_in1_idx, pdst, stride) do { \
-  uint8_t* ptmp1 = (uint8_t*)pdst;                                          \
-  ST6x1_UB(in0, start_in0_idx, in1, start_in1_idx, ptmp1, 4);               \
-  ptmp1 += stride;                                                          \
-  ST6x1_UB(in0, start_in0_idx + 1, in1, start_in1_idx + 1, ptmp1, 4);       \
-  ptmp1 += stride;                                                          \
-  ST6x1_UB(in0, start_in0_idx + 2, in1, start_in1_idx + 2, ptmp1, 4);       \
-  ptmp1 += stride;                                                          \
-  ST6x1_UB(in0, start_in0_idx + 3, in1, start_in1_idx + 3, ptmp1, 4);       \
-} while (0)
-
-#define LPF_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) do {       \
-    v16i8 p1_m, p0_m, q0_m, q1_m, filt, filt1, filt2;                \
-    const v16i8 cnst4b = __msa_ldi_b(4);                             \
-    const v16i8 cnst3b =  __msa_ldi_b(3);                            \
-                                                                     \
-    FLIP_SIGN4(p1_in, p0_in, q0_in, q1_in, p1_m, p0_m, q0_m, q1_m);  \
-    filt = __msa_subs_s_b(p1_m, q1_m);                               \
-    FILT_VAL(q0_m, p0_m, mask, filt);                                \
-    filt1 = __msa_adds_s_b(filt, cnst4b);                            \
-    filt1 = SRAI_B(filt1, 3);                                        \
-    filt2 = __msa_adds_s_b(filt, cnst3b);                            \
-    filt2 = SRAI_B(filt2, 3);                                        \
-    q0_m = __msa_subs_s_b(q0_m, filt1);                              \
-    p0_m = __msa_adds_s_b(p0_m, filt2);                              \
-    q0_in = __msa_xori_b((v16u8)q0_m, 0x80);                         \
-    p0_in = __msa_xori_b((v16u8)p0_m, 0x80);                         \
-} while (0)
-
-#define LPF_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask) do {    \
-    v16u8 p1_a_sub_q1, p0_a_sub_q0;                            \
-                                                               \
-    p0_a_sub_q0 = __msa_asub_u_b(p0, q0);                      \
-    p1_a_sub_q1 = __msa_asub_u_b(p1, q1);                      \
-    p1_a_sub_q1 = (v16u8)__msa_srli_b((v16i8)p1_a_sub_q1, 1);  \
-    p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0);    \
-    mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1);           \
-    mask = (mask <= b_limit);                                  \
-} while (0)
-
-static void VFilter16(uint8_t* src, int stride,
-                      int b_limit_in, int limit_in, int thresh_in) {
+static void VFilter16(uint8_t* src, int stride, int b_limit_in, int limit_in,
+                      int thresh_in) {
  uint8_t* ptemp = src - 4 * stride;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
  v16u8 mask, hev;
@@ -350,8 +358,8 @@ static void VFilter16(uint8_t* src, int stride,
  const v16u8 b_limit = (v16u8)__msa_fill_b(b_limit_in);

  LD_UB8(ptemp, stride, p3, p2, p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
-               hev, mask);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
+               mask);
  LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
  ptemp = src - 3 * stride;
  ST_UB4(p2, p1, p0, q0, ptemp, stride);
@@ -359,9 +367,9 @@ static void VFilter16(uint8_t* src, int stride,
  ST_UB2(q1, q2, ptemp, stride);
 }

-static void HFilter16(uint8_t* src, int stride,
-                      int b_limit_in, int limit_in, int thresh_in) {
-  uint8_t* ptmp  = src - 4;
+static void HFilter16(uint8_t* src, int stride, int b_limit_in, int limit_in,
+                      int thresh_in) {
+  uint8_t* ptmp = src - 4;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
  v16u8 mask, hev;
  v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
@@ -374,11 +382,11 @@ static void HFilter16(uint8_t* src, int stride,
  LD_UB8(ptmp, stride, row0, row1, row2, row3, row4, row5, row6, row7);
  ptmp += (8 * stride);
  LD_UB8(ptmp, stride, row8, row9, row10, row11, row12, row13, row14, row15);
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
-                      p3, p2, p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
-               hev, mask);
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
+                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
+                      p1, p0, q0, q1, q2, q3);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
+               mask);
  LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
  ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
  ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
@@ -420,8 +428,8 @@ static void HFilter16(uint8_t* src, int stride,
 }

 // on three inner edges
-static void VFilterHorEdge16i(uint8_t* src, int stride,
-                              int b_limit, int limit, int thresh) {
+static void VFilterHorEdge16i(uint8_t* src, int stride, int b_limit, int limit,
+                              int thresh) {
  v16u8 mask, hev;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
  const v16u8 thresh0 = (v16u8)__msa_fill_b(thresh);
@@ -429,21 +437,21 @@ static void VFilterHorEdge16i(uint8_t* src, int stride,
  const v16u8 limit0 = (v16u8)__msa_fill_b(limit);

  LD_UB8((src - 4 * stride), stride, p3, p2, p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
-               hev, mask);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
+               mask);
  LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
  ST_UB4(p1, p0, q0, q1, (src - 2 * stride), stride);
 }

-static void VFilter16i(uint8_t* src_y, int stride,
-                       int b_limit, int limit, int thresh) {
-  VFilterHorEdge16i(src_y +  4 * stride, stride, b_limit, limit, thresh);
-  VFilterHorEdge16i(src_y +  8 * stride, stride, b_limit, limit, thresh);
+static void VFilter16i(uint8_t* src_y, int stride, int b_limit, int limit,
+                       int thresh) {
+  VFilterHorEdge16i(src_y + 4 * stride, stride, b_limit, limit, thresh);
+  VFilterHorEdge16i(src_y + 8 * stride, stride, b_limit, limit, thresh);
  VFilterHorEdge16i(src_y + 12 * stride, stride, b_limit, limit, thresh);
 }

-static void HFilterVertEdge16i(uint8_t* src, int stride,
-                               int b_limit, int limit, int thresh) {
+static void HFilterVertEdge16i(uint8_t* src, int stride, int b_limit, int limit,
+                               int thresh) {
  v16u8 mask, hev;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
  v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
@@ -454,13 +462,13 @@ static void HFilterVertEdge16i(uint8_t* src, int stride,
  const v16u8 limit0 = (v16u8)__msa_fill_b(limit);

  LD_UB8(src - 4, stride, row0, row1, row2, row3, row4, row5, row6, row7);
-  LD_UB8(src - 4 + (8 * stride), stride,
-         row8, row9, row10, row11, row12, row13, row14, row15);
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
-                      p3, p2, p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
-               hev, mask);
+  LD_UB8(src - 4 + (8 * stride), stride, row8, row9, row10, row11, row12, row13,
+         row14, row15);
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
+                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
+                      p1, p0, q0, q1, q2, q3);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
+               mask);
  LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
  ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
  ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
@@ -472,10 +480,10 @@ static void HFilterVertEdge16i(uint8_t* src, int stride,
  ST4x8_UB(tmp4, tmp5, src, stride);
 }

-static void HFilter16i(uint8_t* src_y, int stride,
-                       int b_limit, int limit, int thresh) {
-  HFilterVertEdge16i(src_y +  4, stride, b_limit, limit, thresh);
-  HFilterVertEdge16i(src_y +  8, stride, b_limit, limit, thresh);
+static void HFilter16i(uint8_t* src_y, int stride, int b_limit, int limit,
+                       int thresh) {
+  HFilterVertEdge16i(src_y + 4, stride, b_limit, limit, thresh);
+  HFilterVertEdge16i(src_y + 8, stride, b_limit, limit, thresh);
  HFilterVertEdge16i(src_y + 12, stride, b_limit, limit, thresh);
 }

@@ -496,8 +504,8 @@ static void VFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
  LD_UB8(ptmp_src_v, stride, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
  ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
  ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
-               hev, mask);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
+               mask);
  LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
  p2_d = __msa_copy_s_d((v2i64)p2, 0);
  p1_d = __msa_copy_s_d((v2i64)p1, 0);
@@ -538,13 +546,13 @@ static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
  const v16u8 thresh = (v16u8)__msa_fill_b(thresh_in);

  LD_UB8(ptmp_src_u, stride, row0, row1, row2, row3, row4, row5, row6, row7);
-  LD_UB8(ptmp_src_v, stride,
-         row8, row9, row10, row11, row12, row13, row14, row15);
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
-                      p3, p2, p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
-               hev, mask);
+  LD_UB8(ptmp_src_v, stride, row8, row9, row10, row11, row12, row13, row14,
+         row15);
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
+                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
+                      p1, p0, q0, q1, q2, q3);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
+               mask);
  LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
  ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
  ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
@@ -562,8 +570,8 @@ static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
 }

 static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
-                      uint8_t* WEBP_RESTRICT src_v, int stride,
-                      int b_limit_in, int limit_in, int thresh_in) {
+                      uint8_t* WEBP_RESTRICT src_v, int stride, int b_limit_in,
+                      int limit_in, int thresh_in) {
  uint64_t p1_d, p0_d, q0_d, q1_d;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
  v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
@@ -578,8 +586,8 @@ static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
  src_v += (5 * stride);
  ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
  ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
-               hev, mask);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
+               mask);
  LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
  p1_d = __msa_copy_s_d((v2i64)p1, 0);
  p0_d = __msa_copy_s_d((v2i64)p0, 0);
@@ -594,8 +602,8 @@ static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
 }

 static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
-                      uint8_t* WEBP_RESTRICT src_v, int stride,
-                      int b_limit_in, int limit_in, int thresh_in) {
+                      uint8_t* WEBP_RESTRICT src_v, int stride, int b_limit_in,
+                      int limit_in, int thresh_in) {
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
  v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
  v16u8 row9, row10, row11, row12, row13, row14, row15;
@@ -605,13 +613,12 @@ static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
  const v16u8 b_limit = (v16u8)__msa_fill_b(b_limit_in);

  LD_UB8(src_u, stride, row0, row1, row2, row3, row4, row5, row6, row7);
-  LD_UB8(src_v, stride,
-         row8, row9, row10, row11, row12, row13, row14, row15);
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
-                      p3, p2, p1, p0, q0, q1, q2, q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
-               hev, mask);
+  LD_UB8(src_v, stride, row8, row9, row10, row11, row12, row13, row14, row15);
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
+                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
+                      p1, p0, q0, q1, q2, q3);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
+               mask);
  LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
  ILVR_B2_SW(p0, p1, q1, q0, tmp0, tmp1);
  ILVRL_H2_SW(tmp1, tmp0, tmp2, tmp3);
@@ -645,11 +652,11 @@ static void SimpleHFilter16(uint8_t* src, int stride, int b_limit_in) {
  uint8_t* ptemp_src = src - 2;

  LD_UB8(ptemp_src, stride, row0, row1, row2, row3, row4, row5, row6, row7);
-  LD_UB8(ptemp_src + 8 * stride, stride,
-         row8, row9, row10, row11, row12, row13, row14, row15);
-  TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
-                      row8, row9, row10, row11, row12, row13, row14, row15,
-                      p1, p0, q0, q1);
+  LD_UB8(ptemp_src + 8 * stride, stride, row8, row9, row10, row11, row12, row13,
+         row14, row15);
+  TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
+                      row9, row10, row11, row12, row13, row14, row15, p1, p0,
+                      q0, q1);
  LPF_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
  LPF_SIMPLE_FILT(p1, p0, q0, q1, mask);
  ILVRL_B2_SH(q0, p0, tmp1, tmp0);
@@ -665,14 +672,14 @@ static void SimpleHFilter16(uint8_t* src, int stride, int b_limit_in) {
 }

 static void SimpleVFilter16i(uint8_t* src_y, int stride, int b_limit_in) {
-  SimpleVFilter16(src_y +  4 * stride, stride, b_limit_in);
-  SimpleVFilter16(src_y +  8 * stride, stride, b_limit_in);
+  SimpleVFilter16(src_y + 4 * stride, stride, b_limit_in);
+  SimpleVFilter16(src_y + 8 * stride, stride, b_limit_in);
  SimpleVFilter16(src_y + 12 * stride, stride, b_limit_in);
 }

 static void SimpleHFilter16i(uint8_t* src_y, int stride, int b_limit_in) {
-  SimpleHFilter16(src_y +  4, stride, b_limit_in);
-  SimpleHFilter16(src_y +  8, stride, b_limit_in);
+  SimpleHFilter16(src_y + 4, stride, b_limit_in);
+  SimpleHFilter16(src_y + 8, stride, b_limit_in);
  SimpleHFilter16(src_y + 12, stride, b_limit_in);
 }

@@ -682,7 +689,7 @@ static void SimpleHFilter16i(uint8_t* src_y, int stride, int b_limit_in) {

 // 4x4

-static void DC4(uint8_t* dst) {   // DC
+static void DC4(uint8_t* dst) {  // DC
  uint32_t dc = 4;
  int i;
  for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
@@ -694,7 +701,7 @@ static void DC4(uint8_t* dst) {   // DC
 static void TM4(uint8_t* dst) {
  const uint8_t* const ptemp = dst - BPS - 1;
  v8i16 T, d, r0, r1, r2, r3;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  const v8i16 TL = (v8i16)__msa_fill_h(ptemp[0 * BPS]);
  const v8i16 L0 = (v8i16)__msa_fill_h(ptemp[1 * BPS]);
  const v8i16 L1 = (v8i16)__msa_fill_h(ptemp[2 * BPS]);
@@ -702,19 +709,19 @@ static void TM4(uint8_t* dst) {
  const v8i16 L3 = (v8i16)__msa_fill_h(ptemp[4 * BPS]);
  const v16u8 T1 = LD_UB(ptemp + 1);

-  T  = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
+  T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
  d = T - TL;
  ADD4(d, L0, d, L1, d, L2, d, L3, r0, r1, r2, r3);
  CLIP_SH4_0_255(r0, r1, r2, r3);
  PCKEV_ST4x4_UB(r0, r1, r2, r3, dst, BPS);
 }

-static void VE4(uint8_t* dst) {    // vertical
+static void VE4(uint8_t* dst) {  // vertical
  const uint8_t* const ptop = dst - BPS - 1;
  const uint32_t val0 = LW(ptop + 0);
  const uint32_t val1 = LW(ptop + 4);
  uint32_t out;
-  v16u8 A = { 0 }, B, C, AC, B2, R;
+  v16u8 A = {0}, B, C, AC, B2, R;

  INSERT_W2_UB(val0, val1, A);
  B = SLDI_UB(A, A, 1);
@@ -726,12 +733,12 @@ static void VE4(uint8_t* dst) {    // vertical
  SW4(out, out, out, out, dst, BPS);
 }

-static void RD4(uint8_t* dst) {   // Down-right
+static void RD4(uint8_t* dst) {  // Down-right
  const uint8_t* const ptop = dst - 1 - BPS;
  uint32_t val0 = LW(ptop + 0);
  uint32_t val1 = LW(ptop + 4);
  uint32_t val2, val3;
-  v16u8 A, B, C, AC, B2, R, A1 = { 0 };
+  v16u8 A, B, C, AC, B2, R, A1 = {0};

  INSERT_W2_UB(val0, val1, A1);
  A = SLDI_UB(A1, A1, 12);
@@ -754,12 +761,12 @@ static void RD4(uint8_t* dst) {   // Down-right
  SW4(val0, val1, val2, val3, dst, BPS);
 }

-static void LD4(uint8_t* dst) {   // Down-Left
+static void LD4(uint8_t* dst) {  // Down-Left
  const uint8_t* const ptop = dst - BPS;
  uint32_t val0 = LW(ptop + 0);
  uint32_t val1 = LW(ptop + 4);
  uint32_t val2, val3;
-  v16u8 A = { 0 }, B, C, AC, B2, R;
+  v16u8 A = {0}, B, C, AC, B2, R;

  INSERT_W2_UB(val0, val1, A);
  B = SLDI_UB(A, A, 1);
@@ -780,7 +787,7 @@ static void LD4(uint8_t* dst) {   // Down-Left

 // 16x16

-static void DC16(uint8_t* dst) {   // DC
+static void DC16(uint8_t* dst) {  // DC
  uint32_t dc = 16;
  int i;
  const v16u8 rtop = LD_UB(dst - BPS);
@@ -799,7 +806,7 @@ static void DC16(uint8_t* dst) {   // DC
 static void TM16(uint8_t* dst) {
  int j;
  v8i16 d1, d2;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  const v8i16 TL = (v8i16)__msa_fill_h(dst[-1 - BPS]);
  const v16i8 T = LD_SB(dst - BPS);

@@ -822,13 +829,13 @@ static void TM16(uint8_t* dst) {
  }
 }

-static void VE16(uint8_t* dst) {   // vertical
+static void VE16(uint8_t* dst) {  // vertical
  const v16u8 rtop = LD_UB(dst - BPS);
  ST_UB8(rtop, rtop, rtop, rtop, rtop, rtop, rtop, rtop, dst, BPS);
  ST_UB8(rtop, rtop, rtop, rtop, rtop, rtop, rtop, rtop, dst + 8 * BPS, BPS);
 }

-static void HE16(uint8_t* dst) {   // horizontal
+static void HE16(uint8_t* dst) {  // horizontal
  int j;
  for (j = 16; j > 0; j -= 4) {
    const v16u8 L0 = (v16u8)__msa_fill_b(dst[-1 + 0 * BPS]);
@@ -840,7 +847,7 @@ static void HE16(uint8_t* dst) {   // horizontal
  }
 }

-static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
+static void DC16NoTop(uint8_t* dst) {  // DC with top samples not available
  int j;
  uint32_t dc = 8;
  v16u8 out;
@@ -853,7 +860,7 @@ static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
  ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS);
 }

-static void DC16NoLeft(uint8_t* dst) {   // DC with left samples not available
+static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
  uint32_t dc = 8;
  const v16u8 rtop = LD_UB(dst - BPS);
  const v8u16 dctop = __msa_hadd_u_h(rtop, rtop);
@@ -865,7 +872,7 @@ static void DC16NoLeft(uint8_t* dst) {   // DC with left samples not available
  ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS);
 }

-static void DC16NoTopLeft(uint8_t* dst) {   // DC with nothing
+static void DC16NoTopLeft(uint8_t* dst) {  // DC with nothing
  const v16u8 out = (v16u8)__msa_fill_b(0x80);
  ST_UB8(out, out, out, out, out, out, out, out, dst, BPS);
  ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS);
@@ -873,12 +880,13 @@ static void DC16NoTopLeft(uint8_t* dst) {   // DC with nothing

 // Chroma

-#define STORE8x8(out, dst) do {                 \
-  SD4(out, out, out, out, dst + 0 * BPS, BPS);  \
-  SD4(out, out, out, out, dst + 4 * BPS, BPS);  \
-} while (0)
+#define STORE8x8(out, dst)                       \
+  do {                                           \
+    SD4(out, out, out, out, dst + 0 * BPS, BPS); \
+    SD4(out, out, out, out, dst + 4 * BPS, BPS); \
+  } while (0)

-static void DC8uv(uint8_t* dst) {   // DC
+static void DC8uv(uint8_t* dst) {  // DC
  uint32_t dc = 8;
  int i;
  uint64_t out;
@@ -900,8 +908,8 @@ static void DC8uv(uint8_t* dst) {   // DC
 static void TM8uv(uint8_t* dst) {
  int j;
  const v16i8 T1 = LD_SB(dst - BPS);
-  const v16i8 zero = { 0 };
-  const v8i16 T  = (v8i16)__msa_ilvr_b(zero, T1);
+  const v16i8 zero = {0};
+  const v8i16 T = (v8i16)__msa_ilvr_b(zero, T1);
  const v8i16 TL = (v8i16)__msa_fill_h(dst[-1 - BPS]);
  const v8i16 d = T - TL;

@@ -920,13 +928,13 @@ static void TM8uv(uint8_t* dst) {
  }
 }

-static void VE8uv(uint8_t* dst) {   // vertical
+static void VE8uv(uint8_t* dst) {  // vertical
  const v16u8 rtop = LD_UB(dst - BPS);
  const uint64_t out = __msa_copy_s_d((v2i64)rtop, 0);
  STORE8x8(out, dst);
 }

-static void HE8uv(uint8_t* dst) {   // horizontal
+static void HE8uv(uint8_t* dst) {  // horizontal
  int j;
  for (j = 0; j < 8; j += 4) {
    const v16u8 L0 = (v16u8)__msa_fill_b(dst[-1 + 0 * BPS]);
@@ -942,7 +950,7 @@ static void HE8uv(uint8_t* dst) {   // horizontal
  }
 }

-static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
+static void DC8uvNoLeft(uint8_t* dst) {  // DC with no left samples
  const uint32_t dc = 4;
  const v16u8 rtop = LD_UB(dst - BPS);
  const v8u16 temp0 = __msa_hadd_u_h(rtop, rtop);
@@ -954,7 +962,7 @@ static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
  STORE8x8(out, dst);
 }

-static void DC8uvNoTop(uint8_t* dst) {   // DC with no top samples
+static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
  uint32_t dc = 4;
  int i;
  uint64_t out;
@@ -968,7 +976,7 @@ static void DC8uvNoTop(uint8_t* dst) {   // DC with no top samples
  STORE8x8(out, dst);
 }

-static void DC8uvNoTopLeft(uint8_t* dst) {   // DC with nothing
+static void DC8uvNoTopLeft(uint8_t* dst) {  // DC with nothing
  const uint64_t out = 0x8080808080808080ULL;
  STORE8x8(out, dst);
 }
@@ -984,16 +992,16 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMSA(void) {
  VP8TransformDC = TransformDC;
  VP8TransformAC3 = TransformAC3;

-  VP8VFilter16  = VFilter16;
-  VP8HFilter16  = HFilter16;
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
  VP8VFilter16i = VFilter16i;
  VP8HFilter16i = HFilter16i;
-  VP8VFilter8  = VFilter8;
-  VP8HFilter8  = HFilter8;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
  VP8VFilter8i = VFilter8i;
  VP8HFilter8i = HFilter8i;
-  VP8SimpleVFilter16  = SimpleVFilter16;
-  VP8SimpleHFilter16  = SimpleHFilter16;
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
  VP8SimpleVFilter16i = SimpleVFilter16i;
  VP8SimpleHFilter16i = SimpleHFilter16i;

--- a/src/dsp/dec_neon.c
+++ b/src/dsp/dec_neon.c
@@ -16,8 +16,8 @@

 #if defined(WEBP_USE_NEON)

-#include "src/dsp/neon.h"
 #include "src/dec/vp8i_dec.h"
+#include "src/dsp/neon.h"

 //------------------------------------------------------------------------------
 // NxM Loading functions
@@ -60,10 +60,11 @@ static WEBP_INLINE void Load4x16_NEON(const uint8_t* const src, int stride,

 #else  // WORK_AROUND_GCC

-#define LOADQ_LANE_32b(VALUE, LANE) do {                             \
-  (VALUE) = vld1q_lane_u32((const uint32_t*)src, (VALUE), (LANE));   \
-  src += stride;                                                     \
-} while (0)
+#define LOADQ_LANE_32b(VALUE, LANE)                                  \
+  do {                                                               \
+    (VALUE) = vld1q_lane_u32((const uint32_t*)src, (VALUE), (LANE)); \
+    src += stride;                                                   \
+  } while (0)

 static WEBP_INLINE void Load4x16_NEON(const uint8_t* src, int stride,
                                      uint8x16_t* const p1,
@@ -111,10 +112,10 @@ static WEBP_INLINE void Load4x16_NEON(const uint8_t* src, int stride,
 #endif  // !WORK_AROUND_GCC

 static WEBP_INLINE void Load8x16_NEON(
-    const uint8_t* const src, int stride,
-    uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
-    uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
-    uint8x16_t* const q2, uint8x16_t* const q3) {
+    const uint8_t* const src, int stride, uint8x16_t* const p3,
+    uint8x16_t* const p2, uint8x16_t* const p1, uint8x16_t* const p0,
+    uint8x16_t* const q0, uint8x16_t* const q1, uint8x16_t* const q2,
+    uint8x16_t* const q3) {
  Load4x16_NEON(src - 2, stride, p3, p2, p1, p0);
  Load4x16_NEON(src + 2, stride, q0, q1, q2, q3);
 }
@@ -131,12 +132,12 @@ static WEBP_INLINE void Load16x4_NEON(const uint8_t* const src, int stride,
 }

 static WEBP_INLINE void Load16x8_NEON(
-    const uint8_t* const src, int stride,
-    uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
-    uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
-    uint8x16_t* const q2, uint8x16_t* const q3) {
-  Load16x4_NEON(src - 2  * stride, stride, p3, p2, p1, p0);
-  Load16x4_NEON(src + 2  * stride, stride, q0, q1, q2, q3);
+    const uint8_t* const src, int stride, uint8x16_t* const p3,
+    uint8x16_t* const p2, uint8x16_t* const p1, uint8x16_t* const p0,
+    uint8x16_t* const q0, uint8x16_t* const q1, uint8x16_t* const q2,
+    uint8x16_t* const q3) {
+  Load16x4_NEON(src - 2 * stride, stride, p3, p2, p1, p0);
+  Load16x4_NEON(src + 2 * stride, stride, q0, q1, q2, q3);
 }

 static WEBP_INLINE void Load8x8x2_NEON(
@@ -220,8 +221,8 @@ static WEBP_INLINE void Load8x8x2T_NEON(

 #endif  // !WORK_AROUND_GCC

-static WEBP_INLINE void Store2x8_NEON(const uint8x8x2_t v,
-                                      uint8_t* const dst, int stride) {
+static WEBP_INLINE void Store2x8_NEON(const uint8x8x2_t v, uint8_t* const dst,
+                                      int stride) {
  vst2_lane_u8(dst + 0 * stride, v, 0);
  vst2_lane_u8(dst + 1 * stride, v, 1);
  vst2_lane_u8(dst + 2 * stride, v, 2);
@@ -244,8 +245,8 @@ static WEBP_INLINE void Store2x16_NEON(const uint8x16_t p0, const uint8x16_t q0,
 }

 #if !defined(WORK_AROUND_GCC)
-static WEBP_INLINE void Store4x8_NEON(const uint8x8x4_t v,
-                                      uint8_t* const dst, int stride) {
+static WEBP_INLINE void Store4x8_NEON(const uint8x8x4_t v, uint8_t* const dst,
+                                      int stride) {
  vst4_lane_u8(dst + 0 * stride, v, 0);
  vst4_lane_u8(dst + 1 * stride, v, 1);
  vst4_lane_u8(dst + 2 * stride, v, 2);
@@ -260,12 +261,10 @@ static WEBP_INLINE void Store4x16_NEON(const uint8x16_t p1, const uint8x16_t p0,
                                       const uint8x16_t q0, const uint8x16_t q1,
                                       uint8_t* const dst, int stride) {
  uint8x8x4_t lo, hi;
-  INIT_VECTOR4(lo,
-               vget_low_u8(p1), vget_low_u8(p0),
-               vget_low_u8(q0), vget_low_u8(q1));
-  INIT_VECTOR4(hi,
-               vget_high_u8(p1), vget_high_u8(p0),
-               vget_high_u8(q0), vget_high_u8(q1));
+  INIT_VECTOR4(lo, vget_low_u8(p1), vget_low_u8(p0), vget_low_u8(q0),
+               vget_low_u8(q1));
+  INIT_VECTOR4(hi, vget_high_u8(p1), vget_high_u8(p0), vget_high_u8(q0),
+               vget_high_u8(q1));
  Store4x8_NEON(lo, dst - 2 + 0 * stride, stride);
  Store4x8_NEON(hi, dst - 2 + 8 * stride, stride);
 }
@@ -285,22 +284,20 @@ static WEBP_INLINE void Store16x4_NEON(const uint8x16_t p1, const uint8x16_t p0,
 }

 static WEBP_INLINE void Store8x2x2_NEON(const uint8x16_t p0,
-                                        const uint8x16_t q0,
-                                        uint8_t* const u, uint8_t* const v,
-                                        int stride) {
+                                        const uint8x16_t q0, uint8_t* const u,
+                                        uint8_t* const v, int stride) {
  // p0 and q0 contain the u+v samples packed in low/high halves.
  vst1_u8(u - stride, vget_low_u8(p0));
-  vst1_u8(u,          vget_low_u8(q0));
+  vst1_u8(u, vget_low_u8(q0));
  vst1_u8(v - stride, vget_high_u8(p0));
-  vst1_u8(v,          vget_high_u8(q0));
+  vst1_u8(v, vget_high_u8(q0));
 }

 static WEBP_INLINE void Store8x4x2_NEON(const uint8x16_t p1,
                                        const uint8x16_t p0,
                                        const uint8x16_t q0,
-                                        const uint8x16_t q1,
-                                        uint8_t* const u, uint8_t* const v,
-                                        int stride) {
+                                        const uint8x16_t q1, uint8_t* const u,
+                                        uint8_t* const v, int stride) {
  // The p1...q1 registers contain the u+v samples packed in low/high halves.
  Store8x2x2_NEON(p1, p0, u - stride, v - stride, stride);
  Store8x2x2_NEON(q0, q1, u + stride, v + stride, stride);
@@ -308,16 +305,17 @@ static WEBP_INLINE void Store8x4x2_NEON(const uint8x16_t p1,

 #if !defined(WORK_AROUND_GCC)

-#define STORE6_LANE(DST, VAL0, VAL1, LANE) do {   \
-  vst3_lane_u8((DST) - 3, (VAL0), (LANE));        \
-  vst3_lane_u8((DST) + 0, (VAL1), (LANE));        \
-  (DST) += stride;                                \
-} while (0)
+#define STORE6_LANE(DST, VAL0, VAL1, LANE)   \
+  do {                                       \
+    vst3_lane_u8((DST) - 3, (VAL0), (LANE)); \
+    vst3_lane_u8((DST) + 0, (VAL1), (LANE)); \
+    (DST) += stride;                         \
+  } while (0)

 static WEBP_INLINE void Store6x8x2_NEON(
    const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
-    const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
-    uint8_t* u, uint8_t* v, int stride) {
+    const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2, uint8_t* u,
+    uint8_t* v, int stride) {
  uint8x8x3_t u0, u1, v0, v1;
  INIT_VECTOR3(u0, vget_low_u8(p2), vget_low_u8(p1), vget_low_u8(p0));
  INIT_VECTOR3(u1, vget_low_u8(q0), vget_low_u8(q1), vget_low_u8(q2));
@@ -345,16 +343,13 @@ static WEBP_INLINE void Store6x8x2_NEON(
 static WEBP_INLINE void Store4x8x2_NEON(const uint8x16_t p1,
                                        const uint8x16_t p0,
                                        const uint8x16_t q0,
-                                        const uint8x16_t q1,
-                                        uint8_t* const u, uint8_t* const v,
-                                        int stride) {
+                                        const uint8x16_t q1, uint8_t* const u,
+                                        uint8_t* const v, int stride) {
  uint8x8x4_t u0, v0;
-  INIT_VECTOR4(u0,
-               vget_low_u8(p1), vget_low_u8(p0),
-               vget_low_u8(q0), vget_low_u8(q1));
-  INIT_VECTOR4(v0,
-               vget_high_u8(p1), vget_high_u8(p0),
-               vget_high_u8(q0), vget_high_u8(q1));
+  INIT_VECTOR4(u0, vget_low_u8(p1), vget_low_u8(p0), vget_low_u8(q0),
+               vget_low_u8(q1));
+  INIT_VECTOR4(v0, vget_high_u8(p1), vget_high_u8(p0), vget_high_u8(q0),
+               vget_high_u8(q1));
  vst4_lane_u8(u - 2 + 0 * stride, u0, 0);
  vst4_lane_u8(u - 2 + 1 * stride, u0, 1);
  vst4_lane_u8(u - 2 + 2 * stride, u0, 2);
@@ -397,8 +392,7 @@ static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
 }

 static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
-                                    const int16x8_t row23,
-                                    uint8_t* const dst) {
+                                    const int16x8_t row23, uint8_t* const dst) {
  uint32x2_t dst01 = vdup_n_u32(0);
  uint32x2_t dst23 = vdup_n_u32(0);

@@ -449,26 +443,25 @@ static uint8x16_t FlipSignBack_NEON(const int8x16_t v) {

 static int8x16_t GetBaseDelta_NEON(const int8x16_t p1, const int8x16_t p0,
                                   const int8x16_t q0, const int8x16_t q1) {
-  const int8x16_t q0_p0 = vqsubq_s8(q0, p0);      // (q0-p0)
-  const int8x16_t p1_q1 = vqsubq_s8(p1, q1);      // (p1-q1)
-  const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0);   // (p1-q1) + 1 * (q0 - p0)
-  const int8x16_t s2 = vqaddq_s8(q0_p0, s1);      // (p1-q1) + 2 * (q0 - p0)
-  const int8x16_t s3 = vqaddq_s8(q0_p0, s2);      // (p1-q1) + 3 * (q0 - p0)
+  const int8x16_t q0_p0 = vqsubq_s8(q0, p0);     // (q0-p0)
+  const int8x16_t p1_q1 = vqsubq_s8(p1, q1);     // (p1-q1)
+  const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0);  // (p1-q1) + 1 * (q0 - p0)
+  const int8x16_t s2 = vqaddq_s8(q0_p0, s1);     // (p1-q1) + 2 * (q0 - p0)
+  const int8x16_t s3 = vqaddq_s8(q0_p0, s2);     // (p1-q1) + 3 * (q0 - p0)
  return s3;
 }

 static int8x16_t GetBaseDelta0_NEON(const int8x16_t p0, const int8x16_t q0) {
-  const int8x16_t q0_p0 = vqsubq_s8(q0, p0);      // (q0-p0)
-  const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0);   // 2 * (q0 - p0)
-  const int8x16_t s2 = vqaddq_s8(q0_p0, s1);      // 3 * (q0 - p0)
+  const int8x16_t q0_p0 = vqsubq_s8(q0, p0);     // (q0-p0)
+  const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0);  // 2 * (q0 - p0)
+  const int8x16_t s2 = vqaddq_s8(q0_p0, s1);     // 3 * (q0 - p0)
  return s2;
 }

 //------------------------------------------------------------------------------

 static void ApplyFilter2NoFlip_NEON(const int8x16_t p0s, const int8x16_t q0s,
-                                    const int8x16_t delta,
-                                    int8x16_t* const op0,
+                                    const int8x16_t delta, int8x16_t* const op0,
                                    int8x16_t* const oq0) {
  const int8x16_t kCst3 = vdupq_n_s8(0x03);
  const int8x16_t kCst4 = vdupq_n_s8(0x04);
@@ -483,8 +476,8 @@ static void ApplyFilter2NoFlip_NEON(const int8x16_t p0s, const int8x16_t q0s,
 #if defined(WEBP_USE_INTRINSICS)

 static void ApplyFilter2_NEON(const int8x16_t p0s, const int8x16_t q0s,
-                              const int8x16_t delta,
-                              uint8x16_t* const op0, uint8x16_t* const oq0) {
+                              const int8x16_t delta, uint8x16_t* const op0,
+                              uint8x16_t* const oq0) {
  const int8x16_t kCst3 = vdupq_n_s8(0x03);
  const int8x16_t kCst4 = vdupq_n_s8(0x04);
  const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
@@ -499,8 +492,8 @@ static void ApplyFilter2_NEON(const int8x16_t p0s, const int8x16_t q0s,

 static void DoFilter2_NEON(const uint8x16_t p1, const uint8x16_t p0,
                           const uint8x16_t q0, const uint8x16_t q1,
-                           const uint8x16_t mask,
-                           uint8x16_t* const op0, uint8x16_t* const oq0) {
+                           const uint8x16_t mask, uint8x16_t* const op0,
+                           uint8x16_t* const oq0) {
  const int8x16_t p1s = FlipSign_NEON(p1);
  const int8x16_t p0s = FlipSign_NEON(p0);
  const int8x16_t q0s = FlipSign_NEON(q0);
@@ -533,6 +526,7 @@ static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
 #else

 // Load/Store vertical edge
+// clang-format off
 #define LOAD8x4(c1, c2, c3, c4, b1, b2, stride)                                \
  "vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \
  "vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \
@@ -600,30 +594,30 @@ static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
  "vand       q9, q9, q11                 \n"  /* apply filter mask */         \
  DO_SIMPLE_FILTER(p0, q0, q9)                 /* apply filter */              \
  FLIP_SIGN_BIT2(p0, q0, q10)
+// clang-format on

 static void SimpleVFilter16_NEON(uint8_t* p, int stride, int thresh) {
-  __asm__ volatile (
-    "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
+  __asm__ volatile(
+      "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride

-    "vld1.u8    {q1}, [%[p]], %[stride]        \n"  // p1
-    "vld1.u8    {q2}, [%[p]], %[stride]        \n"  // p0
-    "vld1.u8    {q3}, [%[p]], %[stride]        \n"  // q0
-    "vld1.u8    {q12}, [%[p]]                  \n"  // q1
+      "vld1.u8    {q1}, [%[p]], %[stride]        \n"  // p1
+      "vld1.u8    {q2}, [%[p]], %[stride]        \n"  // p0
+      "vld1.u8    {q3}, [%[p]], %[stride]        \n"  // q0
+      "vld1.u8    {q12}, [%[p]]                  \n"  // q1

-    DO_FILTER2(q1, q2, q3, q12, %[thresh])
+      DO_FILTER2(q1, q2, q3, q12, % [thresh])  //

-    "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
+      "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride

-    "vst1.u8    {q2}, [%[p]], %[stride]        \n"  // store op0
-    "vst1.u8    {q3}, [%[p]]                   \n"  // store oq0
-    : [p] "+r"(p)
-    : [stride] "r"(stride), [thresh] "r"(thresh)
-    : "memory", QRegs
-  );
+      "vst1.u8    {q2}, [%[p]], %[stride]        \n"  // store op0
+      "vst1.u8    {q3}, [%[p]]                   \n"  // store oq0
+      : [p] "+r"(p)
+      : [stride] "r"(stride), [thresh] "r"(thresh)
+      : "memory", QRegs);
 }

 static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
-  __asm__ volatile (
+  __asm__ volatile(
    "sub        r4, %[p], #2                   \n"  // base1 = p - 2
    "lsl        r6, %[stride], #1              \n"  // r6 = 2 * stride
    "add        r5, r4, %[stride]              \n"  // base2 = base1 + stride
@@ -651,7 +645,7 @@ static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
 #undef LOAD8x4
 #undef STORE8x2

-#endif    // WEBP_USE_INTRINSICS
+#endif  // WEBP_USE_INTRINSICS

 static void SimpleVFilter16i_NEON(uint8_t* p, int stride, int thresh) {
  uint32_t k;
@@ -708,31 +702,29 @@ static uint8x16_t NeedsFilter2_NEON(const uint8x16_t p3, const uint8x16_t p2,

 //  4-points filter

-static void ApplyFilter4_NEON(
-    const int8x16_t p1, const int8x16_t p0,
-    const int8x16_t q0, const int8x16_t q1,
-    const int8x16_t delta0,
-    uint8x16_t* const op1, uint8x16_t* const op0,
-    uint8x16_t* const oq0, uint8x16_t* const oq1) {
+static void ApplyFilter4_NEON(const int8x16_t p1, const int8x16_t p0,
+                              const int8x16_t q0, const int8x16_t q1,
+                              const int8x16_t delta0, uint8x16_t* const op1,
+                              uint8x16_t* const op0, uint8x16_t* const oq0,
+                              uint8x16_t* const oq1) {
  const int8x16_t kCst3 = vdupq_n_s8(0x03);
  const int8x16_t kCst4 = vdupq_n_s8(0x04);
  const int8x16_t delta1 = vqaddq_s8(delta0, kCst4);
  const int8x16_t delta2 = vqaddq_s8(delta0, kCst3);
  const int8x16_t a1 = vshrq_n_s8(delta1, 3);
  const int8x16_t a2 = vshrq_n_s8(delta2, 3);
-  const int8x16_t a3 = vrshrq_n_s8(a1, 1);   // a3 = (a1 + 1) >> 1
+  const int8x16_t a3 = vrshrq_n_s8(a1, 1);      // a3 = (a1 + 1) >> 1
  *op0 = FlipSignBack_NEON(vqaddq_s8(p0, a2));  // clip(p0 + a2)
  *oq0 = FlipSignBack_NEON(vqsubq_s8(q0, a1));  // clip(q0 - a1)
  *op1 = FlipSignBack_NEON(vqaddq_s8(p1, a3));  // clip(p1 + a3)
  *oq1 = FlipSignBack_NEON(vqsubq_s8(q1, a3));  // clip(q1 - a3)
 }

-static void DoFilter4_NEON(
-    const uint8x16_t p1, const uint8x16_t p0,
-    const uint8x16_t q0, const uint8x16_t q1,
-    const uint8x16_t mask, const uint8x16_t hev_mask,
-    uint8x16_t* const op1, uint8x16_t* const op0,
-    uint8x16_t* const oq0, uint8x16_t* const oq1) {
+static void DoFilter4_NEON(const uint8x16_t p1, const uint8x16_t p0,
+                           const uint8x16_t q0, const uint8x16_t q1,
+                           const uint8x16_t mask, const uint8x16_t hev_mask,
+                           uint8x16_t* const op1, uint8x16_t* const op0,
+                           uint8x16_t* const oq0, uint8x16_t* const oq1) {
  // This is a fused version of DoFilter2() calling ApplyFilter2 directly
  const int8x16_t p1s = FlipSign_NEON(p1);
  int8x16_t p0s = FlipSign_NEON(p0);
@@ -761,12 +753,13 @@ static void DoFilter4_NEON(

 //  6-points filter

-static void ApplyFilter6_NEON(
-    const int8x16_t p2, const int8x16_t p1, const int8x16_t p0,
-    const int8x16_t q0, const int8x16_t q1, const int8x16_t q2,
-    const int8x16_t delta,
-    uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
-    uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
+static void ApplyFilter6_NEON(const int8x16_t p2, const int8x16_t p1,
+                              const int8x16_t p0, const int8x16_t q0,
+                              const int8x16_t q1, const int8x16_t q2,
+                              const int8x16_t delta, uint8x16_t* const op2,
+                              uint8x16_t* const op1, uint8x16_t* const op0,
+                              uint8x16_t* const oq0, uint8x16_t* const oq1,
+                              uint8x16_t* const oq2) {
  // We have to compute: X = (9*a+63) >> 7, Y = (18*a+63)>>7, Z = (27*a+63) >> 7
  // Turns out, there's a common sub-expression S=9 * a - 1 that can be used
  // with the special vqrshrn_n_s16 rounding-shift-and-narrow instruction:
@@ -778,13 +771,13 @@ static void ApplyFilter6_NEON(
  const int8x8_t kCst18 = vdup_n_s8(18);
  const int16x8_t S_lo = vmlal_s8(kCstm1, kCst9, delta_lo);  // S = 9 * a - 1
  const int16x8_t S_hi = vmlal_s8(kCstm1, kCst9, delta_hi);
-  const int16x8_t Z_lo = vmlal_s8(S_lo, kCst18, delta_lo);   // S + 18 * a
+  const int16x8_t Z_lo = vmlal_s8(S_lo, kCst18, delta_lo);  // S + 18 * a
  const int16x8_t Z_hi = vmlal_s8(S_hi, kCst18, delta_hi);
-  const int8x8_t a3_lo = vqrshrn_n_s16(S_lo, 7);   // (9 * a + 63) >> 7
+  const int8x8_t a3_lo = vqrshrn_n_s16(S_lo, 7);  // (9 * a + 63) >> 7
  const int8x8_t a3_hi = vqrshrn_n_s16(S_hi, 7);
-  const int8x8_t a2_lo = vqrshrn_n_s16(S_lo, 6);   // (9 * a + 31) >> 6
+  const int8x8_t a2_lo = vqrshrn_n_s16(S_lo, 6);  // (9 * a + 31) >> 6
  const int8x8_t a2_hi = vqrshrn_n_s16(S_hi, 6);
-  const int8x8_t a1_lo = vqrshrn_n_s16(Z_lo, 7);   // (27 * a + 63) >> 7
+  const int8x8_t a1_lo = vqrshrn_n_s16(Z_lo, 7);  // (27 * a + 63) >> 7
  const int8x8_t a1_hi = vqrshrn_n_s16(Z_hi, 7);
  const int8x16_t a1 = vcombine_s8(a1_lo, a1_hi);
  const int8x16_t a2 = vcombine_s8(a2_lo, a2_hi);
@@ -798,12 +791,13 @@ static void ApplyFilter6_NEON(
  *op2 = FlipSignBack_NEON(vqaddq_s8(p2, a3));  // clip(p2 + a3)
 }

-static void DoFilter6_NEON(
-    const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
-    const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
-    const uint8x16_t mask, const uint8x16_t hev_mask,
-    uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
-    uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
+static void DoFilter6_NEON(const uint8x16_t p2, const uint8x16_t p1,
+                           const uint8x16_t p0, const uint8x16_t q0,
+                           const uint8x16_t q1, const uint8x16_t q2,
+                           const uint8x16_t mask, const uint8x16_t hev_mask,
+                           uint8x16_t* const op2, uint8x16_t* const op1,
+                           uint8x16_t* const op0, uint8x16_t* const oq0,
+                           uint8x16_t* const oq1, uint8x16_t* const oq2) {
  // This is a fused version of DoFilter2() calling ApplyFilter2 directly
  const int8x16_t p2s = FlipSign_NEON(p2);
  const int8x16_t p1s = FlipSign_NEON(p1);
@@ -827,41 +821,41 @@ static void DoFilter6_NEON(
    const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
    const int8x16_t complex_lf_delta =
        vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
-    ApplyFilter6_NEON(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta,
-                      op2, op1, op0, oq0, oq1, oq2);
+    ApplyFilter6_NEON(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta, op2, op1,
+                      op0, oq0, oq1, oq2);
  }
 }

 // on macroblock edges

-static void VFilter16_NEON(uint8_t* p, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void VFilter16_NEON(uint8_t* p, int stride, int thresh, int ithresh,
+                           int hev_thresh) {
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  Load16x8_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
-                                              ithresh, thresh);
+    const uint8x16_t mask =
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
-                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
+                   &oq0, &oq1, &oq2);
    Store16x2_NEON(op2, op1, p - 2 * stride, stride);
    Store16x2_NEON(op0, oq0, p + 0 * stride, stride);
    Store16x2_NEON(oq1, oq2, p + 2 * stride, stride);
  }
 }

-static void HFilter16_NEON(uint8_t* p, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void HFilter16_NEON(uint8_t* p, int stride, int thresh, int ithresh,
+                           int hev_thresh) {
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  Load8x16_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
-                                              ithresh, thresh);
+    const uint8x16_t mask =
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
-                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
+                   &oq0, &oq1, &oq2);
    Store2x16_NEON(op2, op1, p - 2, stride);
    Store2x16_NEON(op0, oq0, p + 0, stride);
    Store2x16_NEON(oq1, oq2, p + 2, stride);
@@ -869,15 +863,15 @@ static void HFilter16_NEON(uint8_t* p, int stride,
 }

 // on three inner edges
-static void VFilter16i_NEON(uint8_t* p, int stride,
-                            int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i_NEON(uint8_t* p, int stride, int thresh, int ithresh,
+                            int hev_thresh) {
  uint32_t k;
  uint8x16_t p3, p2, p1, p0;
-  Load16x4_NEON(p + 2  * stride, stride, &p3, &p2, &p1, &p0);
+  Load16x4_NEON(p + 2 * stride, stride, &p3, &p2, &p1, &p0);
  for (k = 3; k != 0; --k) {
    uint8x16_t q0, q1, q2, q3;
    p += 4 * stride;
-    Load16x4_NEON(p + 2  * stride, stride, &q0, &q1, &q2, &q3);
+    Load16x4_NEON(p + 2 * stride, stride, &q0, &q1, &q2, &q3);
    {
      const uint8x16_t mask =
          NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
@@ -893,8 +887,8 @@ static void VFilter16i_NEON(uint8_t* p, int stride,
 }

 #if !defined(WORK_AROUND_GCC)
-static void HFilter16i_NEON(uint8_t* p, int stride,
-                            int thresh, int ithresh, int hev_thresh) {
+static void HFilter16i_NEON(uint8_t* p, int stride, int thresh, int ithresh,
+                            int hev_thresh) {
  uint32_t k;
  uint8x16_t p3, p2, p1, p0;
  Load4x16_NEON(p + 2, stride, &p3, &p2, &p1, &p0);
@@ -921,27 +915,27 @@ static void VFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
-                                              ithresh, thresh);
+    const uint8x16_t mask =
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
-                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
+                   &oq0, &oq1, &oq2);
    Store8x2x2_NEON(op2, op1, u - 2 * stride, v - 2 * stride, stride);
    Store8x2x2_NEON(op0, oq0, u + 0 * stride, v + 0 * stride, stride);
    Store8x2x2_NEON(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
  }
 }
 static void VFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-                           int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int stride, int thresh, int ithresh,
+                           int hev_thresh) {
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  u += 4 * stride;
  v += 4 * stride;
  Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
-                                              ithresh, thresh);
+    const uint8x16_t mask =
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op1, op0, oq0, oq1;
    DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
@@ -955,26 +949,26 @@ static void HFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
-                                              ithresh, thresh);
+    const uint8x16_t mask =
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
-                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
+                   &oq0, &oq1, &oq2);
    Store6x8x2_NEON(op2, op1, op0, oq0, oq1, oq2, u, v, stride);
  }
 }

 static void HFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-                           int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int stride, int thresh, int ithresh,
+                           int hev_thresh) {
  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
  u += 4;
  v += 4;
  Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
  {
-    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
-                                              ithresh, thresh);
+    const uint8x16_t mask =
+        NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
    uint8x16_t op1, op0, oq0, oq1;
    DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
@@ -1012,8 +1006,8 @@ static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
                                          int16x8x2_t* const out) {
  // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
  // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
-  const int16x8x2_t tmp0 = vzipq_s16(in0, in1);   // a0 c0 a1 c1 a2 c2 ...
-                                                  // b0 d0 b1 d1 b2 d2 ...
+  const int16x8x2_t tmp0 = vzipq_s16(in0, in1);  // a0 c0 a1 c1 a2 c2 ...
+                                                 // b0 d0 b1 d1 b2 d2 ...
  *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
 }

@@ -1028,17 +1022,17 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
  const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
  const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
  const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]),
-                                vget_low_s16(rows->val[1]));   // in0 + in8
+                                vget_low_s16(rows->val[1]));  // in0 + in8
  const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]),
-                                vget_low_s16(rows->val[1]));   // in0 - in8
+                                vget_low_s16(rows->val[1]));  // in0 - in8
  // c = kC2 * in4 - kC1 * in12
  // d = kC1 * in4 + kC2 * in12
  const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
  const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
-  const int16x8_t D0 = vcombine_s16(a, b);      // D0 = a | b
-  const int16x8_t D1 = vcombine_s16(d, c);      // D1 = d | c
-  const int16x8_t E0 = vqaddq_s16(D0, D1);      // a+d | b+c
-  const int16x8_t E_tmp = vqsubq_s16(D0, D1);   // a-d | b-c
+  const int16x8_t D0 = vcombine_s16(a, b);     // D0 = a | b
+  const int16x8_t D1 = vcombine_s16(d, c);     // D1 = d | c
+  const int16x8_t E0 = vqaddq_s16(D0, D1);     // a+d | b+c
+  const int16x8_t E_tmp = vqsubq_s16(D0, D1);  // a-d | b-c
  const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
  Transpose8x2_NEON(E0, E1, rows);
 }
@@ -1058,135 +1052,135 @@ static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
                              uint8_t* WEBP_RESTRICT dst) {
  const int kBPS = BPS;
  // kC1, kC2. Padded because vld1.16 loads 8 bytes
-  const int16_t constants[4] = { kC1, kC2, 0, 0 };
+  const int16_t constants[4] = {kC1, kC2, 0, 0};
  /* Adapted from libvpx: vp8/common/arm/neon/shortidct4x4llm_neon.asm */
-  __asm__ volatile (
-    "vld1.16         {q1, q2}, [%[in]]           \n"
-    "vld1.16         {d0}, [%[constants]]        \n"
+  __asm__ volatile(
+      "vld1.16         {q1, q2}, [%[in]]           \n"
+      "vld1.16         {d0}, [%[constants]]        \n"

-    /* d2: in[0]
-     * d3: in[8]
-     * d4: in[4]
-     * d5: in[12]
-     */
-    "vswp            d3, d4                      \n"
+      /* d2: in[0]
+       * d3: in[8]
+       * d4: in[4]
+       * d5: in[12]
+       */
+      "vswp            d3, d4                      \n"

-    /* q8 = {in[4], in[12]} * kC1 * 2 >> 16
-     * q9 = {in[4], in[12]} * kC2 >> 16
-     */
-    "vqdmulh.s16     q8, q2, d0[0]               \n"
-    "vqdmulh.s16     q9, q2, d0[1]               \n"
+      /* q8 = {in[4], in[12]} * kC1 * 2 >> 16
+       * q9 = {in[4], in[12]} * kC2 >> 16
+       */
+      "vqdmulh.s16     q8, q2, d0[0]               \n"
+      "vqdmulh.s16     q9, q2, d0[1]               \n"

-    /* d22 = a = in[0] + in[8]
-     * d23 = b = in[0] - in[8]
-     */
-    "vqadd.s16       d22, d2, d3                 \n"
-    "vqsub.s16       d23, d2, d3                 \n"
+      /* d22 = a = in[0] + in[8]
+       * d23 = b = in[0] - in[8]
+       */
+      "vqadd.s16       d22, d2, d3                 \n"
+      "vqsub.s16       d23, d2, d3                 \n"

-    /* The multiplication should be x * kC1 >> 16
-     * However, with vqdmulh we get x * kC1 * 2 >> 16
-     * (multiply, double, return high half)
-     * We avoided this in kC2 by pre-shifting the constant.
-     * q8 = in[4]/[12] * kC1 >> 16
-     */
-    "vshr.s16        q8, q8, #1                  \n"
+      /* The multiplication should be x * kC1 >> 16
+       * However, with vqdmulh we get x * kC1 * 2 >> 16
+       * (multiply, double, return high half)
+       * We avoided this in kC2 by pre-shifting the constant.
+       * q8 = in[4]/[12] * kC1 >> 16
+       */
+      "vshr.s16        q8, q8, #1                  \n"

-    /* Add {in[4], in[12]} back after the multiplication. This is handled by
-     * adding 1 << 16 to kC1 in the libwebp C code.
-     */
-    "vqadd.s16       q8, q2, q8                  \n"
+      /* Add {in[4], in[12]} back after the multiplication. This is handled by
+       * adding 1 << 16 to kC1 in the libwebp C code.
+       */
+      "vqadd.s16       q8, q2, q8                  \n"

-    /* d20 = c = in[4]*kC2 - in[12]*kC1
-     * d21 = d = in[4]*kC1 + in[12]*kC2
-     */
-    "vqsub.s16       d20, d18, d17               \n"
-    "vqadd.s16       d21, d19, d16               \n"
+      /* d20 = c = in[4]*kC2 - in[12]*kC1
+       * d21 = d = in[4]*kC1 + in[12]*kC2
+       */
+      "vqsub.s16       d20, d18, d17               \n"
+      "vqadd.s16       d21, d19, d16               \n"

-    /* d2 = tmp[0] = a + d
-     * d3 = tmp[1] = b + c
-     * d4 = tmp[2] = b - c
-     * d5 = tmp[3] = a - d
-     */
-    "vqadd.s16       d2, d22, d21                \n"
-    "vqadd.s16       d3, d23, d20                \n"
-    "vqsub.s16       d4, d23, d20                \n"
-    "vqsub.s16       d5, d22, d21                \n"
+      /* d2 = tmp[0] = a + d
+       * d3 = tmp[1] = b + c
+       * d4 = tmp[2] = b - c
+       * d5 = tmp[3] = a - d
+       */
+      "vqadd.s16       d2, d22, d21                \n"
+      "vqadd.s16       d3, d23, d20                \n"
+      "vqsub.s16       d4, d23, d20                \n"
+      "vqsub.s16       d5, d22, d21                \n"

-    "vzip.16         q1, q2                      \n"
-    "vzip.16         q1, q2                      \n"
+      "vzip.16         q1, q2                      \n"
+      "vzip.16         q1, q2                      \n"

-    "vswp            d3, d4                      \n"
+      "vswp            d3, d4                      \n"

-    /* q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
-     * q9 = {tmp[4], tmp[12]} * kC2 >> 16
-     */
-    "vqdmulh.s16     q8, q2, d0[0]               \n"
-    "vqdmulh.s16     q9, q2, d0[1]               \n"
+      /* q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
+       * q9 = {tmp[4], tmp[12]} * kC2 >> 16
+       */
+      "vqdmulh.s16     q8, q2, d0[0]               \n"
+      "vqdmulh.s16     q9, q2, d0[1]               \n"

-    /* d22 = a = tmp[0] + tmp[8]
-     * d23 = b = tmp[0] - tmp[8]
-     */
-    "vqadd.s16       d22, d2, d3                 \n"
-    "vqsub.s16       d23, d2, d3                 \n"
+      /* d22 = a = tmp[0] + tmp[8]
+       * d23 = b = tmp[0] - tmp[8]
+       */
+      "vqadd.s16       d22, d2, d3                 \n"
+      "vqsub.s16       d23, d2, d3                 \n"

-    /* See long winded explanations prior */
-    "vshr.s16        q8, q8, #1                  \n"
-    "vqadd.s16       q8, q2, q8                  \n"
+      /* See long winded explanations prior */
+      "vshr.s16        q8, q8, #1                  \n"
+      "vqadd.s16       q8, q2, q8                  \n"

-    /* d20 = c = in[4]*kC2 - in[12]*kC1
-     * d21 = d = in[4]*kC1 + in[12]*kC2
-     */
-    "vqsub.s16       d20, d18, d17               \n"
-    "vqadd.s16       d21, d19, d16               \n"
+      /* d20 = c = in[4]*kC2 - in[12]*kC1
+       * d21 = d = in[4]*kC1 + in[12]*kC2
+       */
+      "vqsub.s16       d20, d18, d17               \n"
+      "vqadd.s16       d21, d19, d16               \n"

-    /* d2 = tmp[0] = a + d
-     * d3 = tmp[1] = b + c
-     * d4 = tmp[2] = b - c
-     * d5 = tmp[3] = a - d
-     */
-    "vqadd.s16       d2, d22, d21                \n"
-    "vqadd.s16       d3, d23, d20                \n"
-    "vqsub.s16       d4, d23, d20                \n"
-    "vqsub.s16       d5, d22, d21                \n"
+      /* d2 = tmp[0] = a + d
+       * d3 = tmp[1] = b + c
+       * d4 = tmp[2] = b - c
+       * d5 = tmp[3] = a - d
+       */
+      "vqadd.s16       d2, d22, d21                \n"
+      "vqadd.s16       d3, d23, d20                \n"
+      "vqsub.s16       d4, d23, d20                \n"
+      "vqsub.s16       d5, d22, d21                \n"

-    "vld1.32         d6[0], [%[dst]], %[kBPS]    \n"
-    "vld1.32         d6[1], [%[dst]], %[kBPS]    \n"
-    "vld1.32         d7[0], [%[dst]], %[kBPS]    \n"
-    "vld1.32         d7[1], [%[dst]], %[kBPS]    \n"
+      "vld1.32         d6[0], [%[dst]], %[kBPS]    \n"
+      "vld1.32         d6[1], [%[dst]], %[kBPS]    \n"
+      "vld1.32         d7[0], [%[dst]], %[kBPS]    \n"
+      "vld1.32         d7[1], [%[dst]], %[kBPS]    \n"

-    "sub         %[dst], %[dst], %[kBPS], lsl #2 \n"
+      "sub         %[dst], %[dst], %[kBPS], lsl #2 \n"

-    /* (val) + 4 >> 3 */
-    "vrshr.s16       d2, d2, #3                  \n"
-    "vrshr.s16       d3, d3, #3                  \n"
-    "vrshr.s16       d4, d4, #3                  \n"
-    "vrshr.s16       d5, d5, #3                  \n"
+      /* (val) + 4 >> 3 */
+      "vrshr.s16       d2, d2, #3                  \n"
+      "vrshr.s16       d3, d3, #3                  \n"
+      "vrshr.s16       d4, d4, #3                  \n"
+      "vrshr.s16       d5, d5, #3                  \n"

-    "vzip.16         q1, q2                      \n"
-    "vzip.16         q1, q2                      \n"
+      "vzip.16         q1, q2                      \n"
+      "vzip.16         q1, q2                      \n"

-    /* Must accumulate before saturating */
-    "vmovl.u8        q8, d6                      \n"
-    "vmovl.u8        q9, d7                      \n"
+      /* Must accumulate before saturating */
+      "vmovl.u8        q8, d6                      \n"
+      "vmovl.u8        q9, d7                      \n"

-    "vqadd.s16       q1, q1, q8                  \n"
-    "vqadd.s16       q2, q2, q9                  \n"
+      "vqadd.s16       q1, q1, q8                  \n"
+      "vqadd.s16       q2, q2, q9                  \n"

-    "vqmovun.s16     d0, q1                      \n"
-    "vqmovun.s16     d1, q2                      \n"
+      "vqmovun.s16     d0, q1                      \n"
+      "vqmovun.s16     d1, q2                      \n"

-    "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
-    "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
-    "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
-    "vst1.32         d1[1], [%[dst]]             \n"
+      "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
+      "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
+      "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
+      "vst1.32         d1[1], [%[dst]]             \n"

-    : [in] "+r"(in), [dst] "+r"(dst)  /* modified registers */
-    : [kBPS] "r"(kBPS), [constants] "r"(constants)  /* constants */
-    : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11"  /* clobbered */
+      : [in] "+r"(in), [dst] "+r"(dst)               /* modified registers */
+      : [kBPS] "r"(kBPS), [constants] "r"(constants) /* constants */
+      : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" /* clobbered */
  );
 }

-#endif    // WEBP_USE_INTRINSICS
+#endif  // WEBP_USE_INTRINSICS

 static void TransformTwo_NEON(const int16_t* WEBP_RESTRICT in,
                              uint8_t* WEBP_RESTRICT dst, int do_two) {
@@ -1204,12 +1198,17 @@ static void TransformDC_NEON(const int16_t* WEBP_RESTRICT in,

 //------------------------------------------------------------------------------

-#define STORE_WHT(dst, col, rows) do {                  \
-  *dst = vgetq_lane_s32(rows.val[0], col); (dst) += 16; \
-  *dst = vgetq_lane_s32(rows.val[1], col); (dst) += 16; \
-  *dst = vgetq_lane_s32(rows.val[2], col); (dst) += 16; \
-  *dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
-} while (0)
+#define STORE_WHT(dst, col, rows)            \
+  do {                                       \
+    *dst = vgetq_lane_s32(rows.val[0], col); \
+    (dst) += 16;                             \
+    *dst = vgetq_lane_s32(rows.val[1], col); \
+    (dst) += 16;                             \
+    *dst = vgetq_lane_s32(rows.val[2], col); \
+    (dst) += 16;                             \
+    *dst = vgetq_lane_s32(rows.val[3], col); \
+    (dst) += 16;                             \
+  } while (0)

 static void TransformWHT_NEON(const int16_t* WEBP_RESTRICT in,
                              int16_t* WEBP_RESTRICT out) {
@@ -1270,10 +1269,9 @@ static void TransformAC3_NEON(const int16_t* WEBP_RESTRICT in,
  const int16x4_t d4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
  const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
  const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
-  const uint64_t cd = (uint64_t)( d1 & 0xffff) <<  0 |
-                      (uint64_t)( c1 & 0xffff) << 16 |
-                      (uint64_t)(-c1 & 0xffff) << 32 |
-                      (uint64_t)(-d1 & 0xffff) << 48;
+  const uint64_t cd =
+      (uint64_t)(d1 & 0xffff) << 0 | (uint64_t)(c1 & 0xffff) << 16 |
+      (uint64_t)(-c1 & 0xffff) << 32 | (uint64_t)(-d1 & 0xffff) << 48;
  const int16x4_t CD = vcreate_s16(cd);
  const int16x4_t B = vqadd_s16(A, CD);
  const int16x8_t m0_m1 = vcombine_s16(vqadd_s16(B, d4), vqadd_s16(B, c4));
@@ -1284,9 +1282,9 @@ static void TransformAC3_NEON(const int16_t* WEBP_RESTRICT in,
 //------------------------------------------------------------------------------
 // 4x4

-static void DC4_NEON(uint8_t* dst) {    // DC
+static void DC4_NEON(uint8_t* dst) {       // DC
  const uint8x8_t A = vld1_u8(dst - BPS);  // top row
-  const uint16x4_t p0 = vpaddl_u8(A);  // cascading summation of the top
+  const uint16x4_t p0 = vpaddl_u8(A);      // cascading summation of the top
  const uint16x4_t p1 = vpadd_u16(p0, p0);
  const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1);
  const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1);
@@ -1307,8 +1305,8 @@ static void DC4_NEON(uint8_t* dst) {    // DC
 // TrueMotion (4x4 + 8x8)
 static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, int size) {
  const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1);  // top-left pixel 'A[-1]'
-  const uint8x8_t T = vld1_u8(dst - BPS);  // top row 'A[0..3]'
-  const uint16x8_t d = vsubl_u8(T, TL);  // A[c] - A[-1]
+  const uint8x8_t T = vld1_u8(dst - BPS);           // top row 'A[0..3]'
+  const uint16x8_t d = vsubl_u8(T, TL);             // A[c] - A[-1]
  int y;
  for (y = 0; y < size; y += 4) {
    // left edge
@@ -1343,7 +1341,7 @@ static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, int size) {

 static void TM4_NEON(uint8_t* dst) { TrueMotion_NEON(dst, 4); }

-static void VE4_NEON(uint8_t* dst) {    // vertical
+static void VE4_NEON(uint8_t* dst) {  // vertical
  // NB: avoid vld1_u64 here as an alignment hint may be added -> SIGBUS.
  const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(dst - BPS - 1));  // top row
  const uint64x1_t A1 = vshr_n_u64(A0, 8);
@@ -1359,7 +1357,7 @@ static void VE4_NEON(uint8_t* dst) {    // vertical
  }
 }

-static void RD4_NEON(uint8_t* dst) {   // Down-right
+static void RD4_NEON(uint8_t* dst) {  // Down-right
  const uint8x8_t XABCD_u8 = vld1_u8(dst - BPS - 1);
  const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
  const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
@@ -1388,7 +1386,7 @@ static void RD4_NEON(uint8_t* dst) {   // Down-right
  vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
 }

-static void LD4_NEON(uint8_t* dst) {    // Down-left
+static void LD4_NEON(uint8_t* dst) {  // Down-left
  // Note using the same shift trick as VE4() is slower here.
  const uint8x8_t ABCDEFGH = vld1_u8(dst - BPS + 0);
  const uint8x8_t BCDEFGH0 = vld1_u8(dst - BPS + 1);
@@ -1410,7 +1408,7 @@ static void LD4_NEON(uint8_t* dst) {    // Down-left
 //------------------------------------------------------------------------------
 // Chroma

-static void VE8uv_NEON(uint8_t* dst) {    // vertical
+static void VE8uv_NEON(uint8_t* dst) {  // vertical
  const uint8x8_t top = vld1_u8(dst - BPS);
  int j;
  for (j = 0; j < 8; ++j) {
@@ -1418,7 +1416,7 @@ static void VE8uv_NEON(uint8_t* dst) {    // vertical
  }
 }

-static void HE8uv_NEON(uint8_t* dst) {    // horizontal
+static void HE8uv_NEON(uint8_t* dst) {  // horizontal
  int j;
  for (j = 0; j < 8; ++j) {
    const uint8x8_t left = vld1_dup_u8(dst - 1);
@@ -1493,7 +1491,7 @@ static void TM8uv_NEON(uint8_t* dst) { TrueMotion_NEON(dst, 8); }
 //------------------------------------------------------------------------------
 // 16x16

-static void VE16_NEON(uint8_t* dst) {     // vertical
+static void VE16_NEON(uint8_t* dst) {  // vertical
  const uint8x16_t top = vld1q_u8(dst - BPS);
  int j;
  for (j = 0; j < 16; ++j) {
@@ -1501,7 +1499,7 @@ static void VE16_NEON(uint8_t* dst) {     // vertical
  }
 }

-static void HE16_NEON(uint8_t* dst) {     // horizontal
+static void HE16_NEON(uint8_t* dst) {  // horizontal
  int j;
  for (j = 0; j < 16; ++j) {
    const uint8x16_t left = vld1q_dup_u8(dst - 1);
@@ -1579,7 +1577,7 @@ static void DC16NoTopLeft_NEON(uint8_t* dst) { DC16_NEON(dst, 0, 0); }

 static void TM16_NEON(uint8_t* dst) {
  const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1);  // top-left pixel 'A[-1]'
-  const uint8x16_t T = vld1q_u8(dst - BPS);  // top row 'A[0..15]'
+  const uint8x16_t T = vld1q_u8(dst - BPS);         // top row 'A[0..15]'
  // A[c] - A[-1]
  const uint16x8_t d_lo = vsubl_u8(vget_low_u8(T), TL);
  const uint16x8_t d_hi = vsubl_u8(vget_high_u8(T), TL);
--- a/src/dsp/dec_sse2.c
+++ b/src/dsp/dec_sse2.c
@@ -19,7 +19,7 @@
 // The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
 // one it seems => disable it by default. Uncomment the following to enable:
 #if !defined(USE_TRANSFORM_AC3)
-#define USE_TRANSFORM_AC3 0   // ALTERNATE_CODE
+#define USE_TRANSFORM_AC3 0  // ALTERNATE_CODE
 #endif

 #include <emmintrin.h>
@@ -119,8 +119,8 @@ static void Transform_SSE2(const int16_t* WEBP_RESTRICT in,
    // multiplications.
    const __m128i four = _mm_set1_epi16(4);
    const __m128i dc = _mm_add_epi16(T0, four);
-    const __m128i a =  _mm_add_epi16(dc, T2);
-    const __m128i b =  _mm_sub_epi16(dc, T2);
+    const __m128i a = _mm_add_epi16(dc, T2);
+    const __m128i b = _mm_sub_epi16(dc, T2);
    // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
    const __m128i c1 = _mm_mulhi_epi16(T1, k2);
    const __m128i c2 = _mm_mulhi_epi16(T3, k1);
@@ -242,15 +242,14 @@ static void TransformAC3_SSE2(const int16_t* WEBP_RESTRICT in,
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
 }

-#endif   // USE_TRANSFORM_AC3
+#endif  // USE_TRANSFORM_AC3

 //------------------------------------------------------------------------------
 // Loop Filter (Paragraph 15)

 // Compute abs(p - q) = subs(p - q) OR subs(q - p)
-#define MM_ABS(p, q)  _mm_or_si128(                                            \
-    _mm_subs_epu8((q), (p)),                                                   \
-    _mm_subs_epu8((p), (q)))
+#define MM_ABS(p, q) \
+  _mm_or_si128(_mm_subs_epu8((q), (p)), _mm_subs_epu8((p), (q)))

 // Shift each byte of "x" by 3 bits while preserving by the sign bit.
 static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
@@ -262,22 +261,24 @@ static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
  *x = _mm_packs_epi16(lo_1, hi_1);
 }

-#define FLIP_SIGN_BIT2(a, b) do {                                              \
-  (a) = _mm_xor_si128(a, sign_bit);                                            \
-  (b) = _mm_xor_si128(b, sign_bit);                                            \
-} while (0)
+#define FLIP_SIGN_BIT2(a, b)          \
+  do {                                \
+    (a) = _mm_xor_si128(a, sign_bit); \
+    (b) = _mm_xor_si128(b, sign_bit); \
+  } while (0)

-#define FLIP_SIGN_BIT4(a, b, c, d) do {                                        \
-  FLIP_SIGN_BIT2(a, b);                                                        \
-  FLIP_SIGN_BIT2(c, d);                                                        \
-} while (0)
+#define FLIP_SIGN_BIT4(a, b, c, d) \
+  do {                             \
+    FLIP_SIGN_BIT2(a, b);          \
+    FLIP_SIGN_BIT2(c, d);          \
+  } while (0)

 // input/output is uint8_t
 static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
                                       const __m128i* const p0,
                                       const __m128i* const q0,
-                                       const __m128i* const q1,
-                                       int hev_thresh, __m128i* const not_hev) {
+                                       const __m128i* const q1, int hev_thresh,
+                                       __m128i* const not_hev) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i t_1 = MM_ABS(*p1, *p0);
  const __m128i t_2 = MM_ABS(*q1, *q0);
@@ -313,10 +314,10 @@ static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
  __m128i v3 = _mm_adds_epi8(*fl, k3);
  __m128i v4 = _mm_adds_epi8(*fl, k4);

-  SignedShift8b_SSE2(&v4);             // v4 >> 3
-  SignedShift8b_SSE2(&v3);             // v3 >> 3
-  *q0 = _mm_subs_epi8(*q0, v4);        // q0 -= v4
-  *p0 = _mm_adds_epi8(*p0, v3);        // p0 += v3
+  SignedShift8b_SSE2(&v4);       // v4 >> 3
+  SignedShift8b_SSE2(&v3);       // v3 >> 3
+  *q0 = _mm_subs_epi8(*q0, v4);  // q0 -= v4
+  *p0 = _mm_adds_epi8(*p0, v3);  // p0 += v3
 }

 // Updates values of 2 pixels at MB edge during complex filtering.
@@ -339,17 +340,17 @@ static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
 static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
                                         const __m128i* const p0,
                                         const __m128i* const q0,
-                                         const __m128i* const q1,
-                                         int thresh, __m128i* const mask) {
+                                         const __m128i* const q1, int thresh,
+                                         __m128i* const mask) {
  const __m128i m_thresh = _mm_set1_epi8((char)thresh);
-  const __m128i t1 = MM_ABS(*p1, *q1);        // abs(p1 - q1)
+  const __m128i t1 = MM_ABS(*p1, *q1);  // abs(p1 - q1)
  const __m128i kFE = _mm_set1_epi8((char)0xFE);
  const __m128i t2 = _mm_and_si128(t1, kFE);  // set lsb of each byte to zero
  const __m128i t3 = _mm_srli_epi16(t2, 1);   // abs(p1 - q1) / 2

-  const __m128i t4 = MM_ABS(*p0, *q0);        // abs(p0 - q0)
-  const __m128i t5 = _mm_adds_epu8(t4, t4);   // abs(p0 - q0) * 2
-  const __m128i t6 = _mm_adds_epu8(t5, t3);   // abs(p0-q0)*2 + abs(p1-q1)/2
+  const __m128i t4 = MM_ABS(*p0, *q0);       // abs(p0 - q0)
+  const __m128i t5 = _mm_adds_epu8(t4, t4);  // abs(p0 - q0) * 2
+  const __m128i t6 = _mm_adds_epu8(t5, t3);  // abs(p0-q0)*2 + abs(p1-q1)/2

  const __m128i t7 = _mm_subs_epu8(t6, m_thresh);  // mask <= m_thresh
  *mask = _mm_cmpeq_epi8(t7, _mm_setzero_si128());
@@ -372,7 +373,7 @@ static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,

  FLIP_SIGN_BIT2(*p0, *q0);
  GetBaseDelta_SSE2(&p1s, p0, q0, &q1s, &a);
-  a = _mm_and_si128(a, mask);     // mask filter values we don't care about
+  a = _mm_and_si128(a, mask);  // mask filter values we don't care about
  DoSimpleFilter_SSE2(p0, q0, &a);
  FLIP_SIGN_BIT2(*p0, *q0);
 }
@@ -404,12 +405,12 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
  t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 3 * (q0 - p0)
  t1 = _mm_and_si128(t1, *mask);       // mask filter values we don't care about

-  t2 = _mm_adds_epi8(t1, k3);        // 3 * (q0 - p0) + hev(p1 - q1) + 3
-  t3 = _mm_adds_epi8(t1, k4);        // 3 * (q0 - p0) + hev(p1 - q1) + 4
-  SignedShift8b_SSE2(&t2);           // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
-  SignedShift8b_SSE2(&t3);           // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
-  *p0 = _mm_adds_epi8(*p0, t2);      // p0 += t2
-  *q0 = _mm_subs_epi8(*q0, t3);      // q0 -= t3
+  t2 = _mm_adds_epi8(t1, k3);    // 3 * (q0 - p0) + hev(p1 - q1) + 3
+  t3 = _mm_adds_epi8(t1, k4);    // 3 * (q0 - p0) + hev(p1 - q1) + 4
+  SignedShift8b_SSE2(&t2);       // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
+  SignedShift8b_SSE2(&t3);       // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
+  *p0 = _mm_adds_epi8(*p0, t2);  // p0 += t2
+  *q0 = _mm_subs_epi8(*q0, t3);  // q0 -= t3
  FLIP_SIGN_BIT2(*p0, *q0);

  // this is equivalent to signed (a + 1) >> 1 calculation
@@ -417,9 +418,9 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
  t3 = _mm_avg_epu8(t2, zero);
  t3 = _mm_sub_epi8(t3, k64);

-  t3 = _mm_and_si128(not_hev, t3);   // if !hev
-  *q1 = _mm_subs_epi8(*q1, t3);      // q1 -= t3
-  *p1 = _mm_adds_epi8(*p1, t3);      // p1 += t3
+  t3 = _mm_and_si128(not_hev, t3);  // if !hev
+  *q1 = _mm_subs_epi8(*q1, t3);     // q1 -= t3
+  *p1 = _mm_adds_epi8(*p1, t3);     // p1 += t3
  FLIP_SIGN_BIT2(*p1, *q1);
 }

@@ -440,13 +441,13 @@ static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
  FLIP_SIGN_BIT2(*p2, *q2);
  GetBaseDelta_SSE2(p1, p0, q0, q1, &a);

-  { // do simple filter on pixels with hev
+  {  // do simple filter on pixels with hev
    const __m128i m = _mm_andnot_si128(not_hev, *mask);
    const __m128i f = _mm_and_si128(a, m);
    DoSimpleFilter_SSE2(p0, q0, &f);
  }

-  { // do strong filter on pixels with not hev
+  {  // do strong filter on pixels with not hev
    const __m128i k9 = _mm_set1_epi16(0x0900);
    const __m128i k63 = _mm_set1_epi16(63);

@@ -456,11 +457,11 @@ static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
    const __m128i f_lo = _mm_unpacklo_epi8(zero, f);
    const __m128i f_hi = _mm_unpackhi_epi8(zero, f);

-    const __m128i f9_lo = _mm_mulhi_epi16(f_lo, k9);    // Filter (lo) * 9
-    const __m128i f9_hi = _mm_mulhi_epi16(f_hi, k9);    // Filter (hi) * 9
+    const __m128i f9_lo = _mm_mulhi_epi16(f_lo, k9);  // Filter (lo) * 9
+    const __m128i f9_hi = _mm_mulhi_epi16(f_hi, k9);  // Filter (hi) * 9

-    const __m128i a2_lo = _mm_add_epi16(f9_lo, k63);    // Filter * 9 + 63
-    const __m128i a2_hi = _mm_add_epi16(f9_hi, k63);    // Filter * 9 + 63
+    const __m128i a2_lo = _mm_add_epi16(f9_lo, k63);  // Filter * 9 + 63
+    const __m128i a2_hi = _mm_add_epi16(f9_hi, k63);  // Filter * 9 + 63

    const __m128i a1_lo = _mm_add_epi16(a2_lo, f9_lo);  // Filter * 18 + 63
    const __m128i a1_hi = _mm_add_epi16(a2_hi, f9_hi);  // Filter * 18 + 63
@@ -503,8 +504,7 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
 }

 static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
-                                      const uint8_t* const r8,
-                                      int stride,
+                                      const uint8_t* const r8, int stride,
                                      __m128i* const p1, __m128i* const p0,
                                      __m128i* const q0, __m128i* const q1) {
  // Assume the pixels around the edge (|) are numbered as follows
@@ -539,8 +539,8 @@ static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
  }
 }

-static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
-                                      uint8_t* dst, int stride) {
+static WEBP_INLINE void Store4x4_SSE2(__m128i* const x, uint8_t* dst,
+                                      int stride) {
  int i;
  for (i = 0; i < 4; ++i, dst += stride) {
    WebPInt32ToMem(dst, _mm_cvtsi128_si32(*x));
@@ -552,9 +552,8 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
 static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
                                       const __m128i* const p0,
                                       const __m128i* const q0,
-                                       const __m128i* const q1,
-                                       uint8_t* r0, uint8_t* r8,
-                                       int stride) {
+                                       const __m128i* const q1, uint8_t* r0,
+                                       uint8_t* r8, int stride) {
  __m128i t1, p1_s, p0_s, q0_s, q1_s;

  // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
@@ -636,50 +635,55 @@ static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
 //------------------------------------------------------------------------------
 // Complex In-loop filtering (Paragraph 15.3)

-#define MAX_DIFF1(p3, p2, p1, p0, m) do {                                      \
-  (m) = MM_ABS(p1, p0);                                                        \
-  (m) = _mm_max_epu8(m, MM_ABS(p3, p2));                                       \
-  (m) = _mm_max_epu8(m, MM_ABS(p2, p1));                                       \
-} while (0)
+#define MAX_DIFF1(p3, p2, p1, p0, m)       \
+  do {                                     \
+    (m) = MM_ABS(p1, p0);                  \
+    (m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
+    (m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
+  } while (0)

-#define MAX_DIFF2(p3, p2, p1, p0, m) do {                                      \
-  (m) = _mm_max_epu8(m, MM_ABS(p1, p0));                                       \
-  (m) = _mm_max_epu8(m, MM_ABS(p3, p2));                                       \
-  (m) = _mm_max_epu8(m, MM_ABS(p2, p1));                                       \
-} while (0)
+#define MAX_DIFF2(p3, p2, p1, p0, m)       \
+  do {                                     \
+    (m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \
+    (m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
+    (m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
+  } while (0)

-#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) do {                          \
-  (e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]);                        \
-  (e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]);                        \
-  (e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]);                        \
-  (e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]);                        \
-} while (0)
+#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4)          \
+  do {                                                    \
+    (e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]); \
+    (e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]); \
+    (e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]); \
+    (e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]); \
+  } while (0)

-#define LOADUV_H_EDGE(p, u, v, stride) do {                                    \
-  const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]);                 \
-  const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]);                 \
-  (p) = _mm_unpacklo_epi64(U, V);                                              \
-} while (0)
+#define LOADUV_H_EDGE(p, u, v, stride)                           \
+  do {                                                           \
+    const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]); \
+    const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]); \
+    (p) = _mm_unpacklo_epi64(U, V);                              \
+  } while (0)

-#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) do {                     \
-  LOADUV_H_EDGE(e1, u, v, 0 * (stride));                                       \
-  LOADUV_H_EDGE(e2, u, v, 1 * (stride));                                       \
-  LOADUV_H_EDGE(e3, u, v, 2 * (stride));                                       \
-  LOADUV_H_EDGE(e4, u, v, 3 * (stride));                                       \
-} while (0)
+#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) \
+  do {                                                \
+    LOADUV_H_EDGE(e1, u, v, 0 * (stride));            \
+    LOADUV_H_EDGE(e2, u, v, 1 * (stride));            \
+    LOADUV_H_EDGE(e3, u, v, 2 * (stride));            \
+    LOADUV_H_EDGE(e4, u, v, 3 * (stride));            \
+  } while (0)

-#define STOREUV(p, u, v, stride) do {                                          \
-  _mm_storel_epi64((__m128i*)&(u)[(stride)], p);                               \
-  (p) = _mm_srli_si128(p, 8);                                                  \
-  _mm_storel_epi64((__m128i*)&(v)[(stride)], p);                               \
-} while (0)
+#define STOREUV(p, u, v, stride)                   \
+  do {                                             \
+    _mm_storel_epi64((__m128i*)&(u)[(stride)], p); \
+    (p) = _mm_srli_si128(p, 8);                    \
+    _mm_storel_epi64((__m128i*)&(v)[(stride)], p); \
+  } while (0)

 static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
                                         const __m128i* const p0,
                                         const __m128i* const q0,
-                                         const __m128i* const q1,
-                                         int thresh, int ithresh,
-                                         __m128i* const mask) {
+                                         const __m128i* const q1, int thresh,
+                                         int ithresh, __m128i* const mask) {
  const __m128i it = _mm_set1_epi8(ithresh);
  const __m128i diff = _mm_subs_epu8(*mask, it);
  const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
@@ -689,8 +693,8 @@ static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
 }

 // on macroblock edges
-static void VFilter16_SSE2(uint8_t* p, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void VFilter16_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
+                           int hev_thresh) {
  __m128i t1;
  __m128i mask;
  __m128i p2, p1, p0, q0, q1, q2;
@@ -715,8 +719,8 @@ static void VFilter16_SSE2(uint8_t* p, int stride,
  _mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
 }

-static void HFilter16_SSE2(uint8_t* p, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void HFilter16_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
+                           int hev_thresh) {
  __m128i mask;
  __m128i p3, p2, p1, p0, q0, q1, q2, q3;

@@ -735,19 +739,19 @@ static void HFilter16_SSE2(uint8_t* p, int stride,
 }

 // on three inner edges
-static void VFilter16i_SSE2(uint8_t* p, int stride,
-                            int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
+                            int hev_thresh) {
  int k;
-  __m128i p3, p2, p1, p0;   // loop invariants
+  __m128i p3, p2, p1, p0;  // loop invariants

  LOAD_H_EDGES4(p, stride, p3, p2, p1, p0);  // prologue

  for (k = 3; k > 0; --k) {
    __m128i mask, tmp1, tmp2;
-    uint8_t* const b = p + 2 * stride;   // beginning of p1
+    uint8_t* const b = p + 2 * stride;  // beginning of p1
    p += 4 * stride;

-    MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
+    MAX_DIFF1(p3, p2, p1, p0, mask);  // compute partial mask
    LOAD_H_EDGES4(p, stride, p3, p2, tmp1, tmp2);
    MAX_DIFF2(p3, p2, tmp1, tmp2, mask);

@@ -768,20 +772,20 @@ static void VFilter16i_SSE2(uint8_t* p, int stride,
  }
 }

-static void HFilter16i_SSE2(uint8_t* p, int stride,
-                            int thresh, int ithresh, int hev_thresh) {
+static void HFilter16i_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
+                            int hev_thresh) {
  int k;
-  __m128i p3, p2, p1, p0;   // loop invariants
+  __m128i p3, p2, p1, p0;  // loop invariants

  Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0);  // prologue

  for (k = 3; k > 0; --k) {
    __m128i mask, tmp1, tmp2;
-    uint8_t* const b = p + 2;   // beginning of p1
+    uint8_t* const b = p + 2;  // beginning of p1

    p += 4;  // beginning of q0 (and next span)

-    MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
+    MAX_DIFF1(p3, p2, p1, p0, mask);  // compute partial mask
    Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
    MAX_DIFF2(p3, p2, tmp1, tmp2, mask);

@@ -843,8 +847,8 @@ static void HFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
 }

 static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-                           int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int stride, int thresh, int ithresh,
+                           int hev_thresh) {
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;

@@ -870,11 +874,11 @@ static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
 }

 static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-                           int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+                           int stride, int thresh, int ithresh,
+                           int hev_thresh) {
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;
-  Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0);   // p3, p2, p1, p0
+  Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0);  // p3, p2, p1, p0
  MAX_DIFF1(t2, t1, p1, p0, mask);

  u += 4;  // beginning of q0
@@ -904,7 +908,7 @@ static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
 //   where: AC = (a + b + 1) >> 1,   BC = (b + c + 1) >> 1
 //   and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1

-static void VE4_SSE2(uint8_t* dst) {    // vertical
+static void VE4_SSE2(uint8_t* dst) {  // vertical
  const __m128i one = _mm_set1_epi8(1);
  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -920,7 +924,7 @@ static void VE4_SSE2(uint8_t* dst) {    // vertical
  }
 }

-static void LD4_SSE2(uint8_t* dst) {   // Down-Left
+static void LD4_SSE2(uint8_t* dst) {  // Down-Left
  const __m128i one = _mm_set1_epi8(1);
  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -930,13 +934,13 @@ static void LD4_SSE2(uint8_t* dst) {   // Down-Left
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcdefg));
  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
 }

-static void VR4_SSE2(uint8_t* dst) {   // Vertical-Right
+static void VR4_SSE2(uint8_t* dst) {  // Vertical-Right
  const __m128i one = _mm_set1_epi8(1);
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
@@ -951,8 +955,8 @@ static void VR4_SSE2(uint8_t* dst) {   // Vertical-Right
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcd    ));
-  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               efgh    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcd));
+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(efgh));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));

@@ -961,7 +965,7 @@ static void VR4_SSE2(uint8_t* dst) {   // Vertical-Right
  DST(0, 3) = AVG3(K, J, I);
 }

-static void VL4_SSE2(uint8_t* dst) {   // Vertical-Left
+static void VL4_SSE2(uint8_t* dst) {  // Vertical-Left
  const __m128i one = _mm_set1_epi8(1);
  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
  const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
@@ -977,8 +981,8 @@ static void VL4_SSE2(uint8_t* dst) {   // Vertical-Left
  const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
  const uint32_t extra_out =
      (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               avg1    ));
-  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               avg4    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(avg1));
+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(avg4));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));

@@ -987,7 +991,7 @@ static void VL4_SSE2(uint8_t* dst) {   // Vertical-Left
  DST(3, 3) = (extra_out >> 8) & 0xff;
 }

-static void RD4_SSE2(uint8_t* dst) {   // Down-right
+static void RD4_SSE2(uint8_t* dst) {  // Down-right
  const __m128i one = _mm_set1_epi8(1);
  const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
  const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
@@ -1004,7 +1008,7 @@ static void RD4_SSE2(uint8_t* dst) {   // Down-right
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
-  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(abcdefg));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
@@ -1053,9 +1057,9 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
  }
 }

-static void TM4_SSE2(uint8_t* dst)   { TrueMotion_SSE2(dst, 4); }
+static void TM4_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 4); }
 static void TM8uv_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 8); }
-static void TM16_SSE2(uint8_t* dst)  { TrueMotion_SSE2(dst, 16); }
+static void TM16_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 16); }

 static void VE16_SSE2(uint8_t* dst) {
  const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
@@ -1065,7 +1069,7 @@ static void VE16_SSE2(uint8_t* dst) {
  }
 }

-static void HE16_SSE2(uint8_t* dst) {     // horizontal
+static void HE16_SSE2(uint8_t* dst) {  // horizontal
  int j;
  for (j = 16; j > 0; --j) {
    const __m128i values = _mm_set1_epi8((char)dst[-1]);
@@ -1125,7 +1129,7 @@ static void DC16NoTopLeft_SSE2(uint8_t* dst) {  // DC with no top & left samples
 //------------------------------------------------------------------------------
 // Chroma

-static void VE8uv_SSE2(uint8_t* dst) {    // vertical
+static void VE8uv_SSE2(uint8_t* dst) {  // vertical
  int j;
  const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
  for (j = 0; j < 8; ++j) {
@@ -1142,7 +1146,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
  }
 }

-static void DC8uv_SSE2(uint8_t* dst) {     // DC
+static void DC8uv_SSE2(uint8_t* dst) {  // DC
  const __m128i zero = _mm_setzero_si128();
  const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
  const __m128i sum = _mm_sad_epu8(top, zero);
@@ -1157,7 +1161,7 @@ static void DC8uv_SSE2(uint8_t* dst) {     // DC
  }
 }

-static void DC8uvNoLeft_SSE2(uint8_t* dst) {   // DC with no left samples
+static void DC8uvNoLeft_SSE2(uint8_t* dst) {  // DC with no left samples
  const __m128i zero = _mm_setzero_si128();
  const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
  const __m128i sum = _mm_sad_epu8(top, zero);
@@ -1174,7 +1178,7 @@ static void DC8uvNoTop_SSE2(uint8_t* dst) {  // DC with no top samples
  Put8x8uv_SSE2(dc0 >> 3, dst);
 }

-static void DC8uvNoTopLeft_SSE2(uint8_t* dst) {    // DC with nothing
+static void DC8uvNoTopLeft_SSE2(uint8_t* dst) {  // DC with nothing
  Put8x8uv_SSE2(0x80, dst);
 }

--- a/src/dsp/dec_sse41.c
+++ b/src/dsp/dec_sse41.c
@@ -17,12 +17,12 @@
 #include <emmintrin.h>
 #include <smmintrin.h>

-#include "src/webp/types.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dsp/cpu.h"
 #include "src/utils/utils.h"
+#include "src/webp/types.h"

-static void HE16_SSE41(uint8_t* dst) {     // horizontal
+static void HE16_SSE41(uint8_t* dst) {  // horizontal
  int j;
  const __m128i kShuffle3 = _mm_set1_epi8(3);
  for (j = 16; j > 0; --j) {
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -25,7 +25,7 @@
 extern "C" {
 #endif

-#define BPS 32   // this is the common stride for enc/dec
+#define BPS 32  // this is the common stride for enc/dec

 //------------------------------------------------------------------------------
 // WEBP_RESTRICT
@@ -44,14 +44,13 @@ extern "C" {
 #define WEBP_RESTRICT
 #endif

-
 //------------------------------------------------------------------------------
 // Init stub generator

 // Defines an init function stub to ensure each module exposes a symbol,
 // avoiding a compiler warning.
 #define WEBP_DSP_INIT_STUB(func) \
-  extern void func(void); \
+  extern void func(void);        \
  void func(void) {}

 //------------------------------------------------------------------------------
@@ -70,7 +69,7 @@ typedef void (*VP8WHT)(const int16_t* WEBP_RESTRICT in,
                       int16_t* WEBP_RESTRICT out);
 extern VP8Idct VP8ITransform;
 extern VP8Fdct VP8FTransform;
-extern VP8Fdct VP8FTransform2;   // performs two transforms at a time
+extern VP8Fdct VP8FTransform2;  // performs two transforms at a time
 extern VP8WHT VP8FTransformWHT;
 // Predictions
 // *dst is the destination block. *top and *left can be NULL.
@@ -95,8 +94,7 @@ extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;

 // Compute the average (DC) of four 4x4 blocks.
 // Each sub-4x4 block #i sum is stored in dc[i].
-typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref,
-                              uint32_t dc[4]);
+typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]);
 extern VP8MeanMetric VP8Mean16x4;

 typedef void (*VP8BlockCopy)(const uint8_t* WEBP_RESTRICT src,
@@ -104,7 +102,7 @@ typedef void (*VP8BlockCopy)(const uint8_t* WEBP_RESTRICT src,
 extern VP8BlockCopy VP8Copy4x4;
 extern VP8BlockCopy VP8Copy16x8;
 // Quantization
-struct VP8Matrix;   // forward declaration
+struct VP8Matrix;  // forward declaration
 typedef int (*VP8QuantizeBlock)(
    int16_t in[16], int16_t out[16],
    const struct VP8Matrix* WEBP_RESTRICT const mtx);
@@ -125,15 +123,15 @@ extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
 extern const int VP8DspScan[16 + 4 + 4];

 // Collect histogram for susceptibility calculation.
-#define MAX_COEFF_THRESH   31   // size of histogram used by CollectHistogram.
+#define MAX_COEFF_THRESH 31  // size of histogram used by CollectHistogram.
 typedef struct {
  // We only need to store max_value and last_non_zero, not the distribution.
  int max_value;
  int last_non_zero;
 } VP8Histogram;
 typedef void (*VP8CHisto)(const uint8_t* WEBP_RESTRICT ref,
-                          const uint8_t* WEBP_RESTRICT pred,
-                          int start_block, int end_block,
+                          const uint8_t* WEBP_RESTRICT pred, int start_block,
+                          int end_block,
                          VP8Histogram* WEBP_RESTRICT const histo);
 extern VP8CHisto VP8CollectHistogram;
 // General-purpose util function to help VP8CollectHistogram().
@@ -146,7 +144,7 @@ void VP8EncDspInit(void);
 //------------------------------------------------------------------------------
 // cost functions (encoding)

-extern const uint16_t VP8EntropyCost[256];        // 8bit fixed-point log(p)
+extern const uint16_t VP8EntropyCost[256];  // 8bit fixed-point log(p)
 // approximate cost per level:
 extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
 extern const uint8_t VP8EncBands[16 + 1];
@@ -180,7 +178,7 @@ typedef struct {
 double VP8SSIMFromStats(const VP8DistoStats* const stats);
 double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats);

-#define VP8_SSIM_KERNEL 3   // total size of the kernel: 2 * VP8_SSIM_KERNEL + 1
+#define VP8_SSIM_KERNEL 3  // total size of the kernel: 2 * VP8_SSIM_KERNEL + 1
 typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
                                        const uint8_t* src2, int stride2,
                                        int xo, int yo,  // center position
@@ -192,8 +190,8 @@ typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
 typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
                                 const uint8_t* src2, int stride2);

-extern VP8SSIMGetFunc VP8SSIMGet;         // unclipped / unchecked
-extern VP8SSIMGetClippedFunc VP8SSIMGetClipped;   // with clipping
+extern VP8SSIMGetFunc VP8SSIMGet;                // unclipped / unchecked
+extern VP8SSIMGetClippedFunc VP8SSIMGetClipped;  // with clipping
 #endif

 #if !defined(WEBP_DISABLE_STATS)
@@ -248,8 +246,8 @@ extern VP8SimpleFilterFunc VP8SimpleVFilter16i;  // filter 3 inner edges
 extern VP8SimpleFilterFunc VP8SimpleHFilter16i;

 // regular filter (on both macroblock edges and inner edges)
-typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
-                                  int thresh, int ithresh, int hev_t);
+typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride, int thresh,
+                                  int ithresh, int hev_t);
 typedef void (*VP8ChromaFilterFunc)(uint8_t* WEBP_RESTRICT u,
                                    uint8_t* WEBP_RESTRICT v, int stride,
                                    int thresh, int ithresh, int hev_t);
@@ -260,7 +258,7 @@ extern VP8ChromaFilterFunc VP8VFilter8;
 extern VP8ChromaFilterFunc VP8HFilter8;

 // on inner edge
-extern VP8LumaFilterFunc VP8VFilter16i;   // filtering 3 inner edges altogether
+extern VP8LumaFilterFunc VP8VFilter16i;  // filtering 3 inner edges altogether
 extern VP8LumaFilterFunc VP8HFilter16i;
 extern VP8ChromaFilterFunc VP8VFilter8i;  // filtering u and v altogether
 extern VP8ChromaFilterFunc VP8HFilter8i;
@@ -280,7 +278,7 @@ void VP8DspInit(void);
 //------------------------------------------------------------------------------
 // WebP I/O

-#define FANCY_UPSAMPLING   // undefined to remove fancy upsampling support
+#define FANCY_UPSAMPLING  // undefined to remove fancy upsampling support

 // Convert a pair of y/u/v lines together to the output rgb/a colorspace.
 // bottom_y can be NULL if only one line of output is needed (at top/bottom).
@@ -295,7 +293,7 @@ typedef void (*WebPUpsampleLinePairFunc)(
 // Fancy upsampling functions to convert YUV to RGB(A) modes
 extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];

-#endif    // FANCY_UPSAMPLING
+#endif  // FANCY_UPSAMPLING

 // Per-row point-sampling methods.
 typedef void (*WebPSamplerRowFunc)(const uint8_t* WEBP_RESTRICT y,
@@ -344,8 +342,8 @@ extern void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
 // the U/V one.
 extern void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
                                   uint8_t* WEBP_RESTRICT u,
-                                   uint8_t* WEBP_RESTRICT v,
-                                   int src_width, int do_store);
+                                   uint8_t* WEBP_RESTRICT v, int src_width,
+                                   int do_store);

 // Convert a row of accumulated (four-values) of rgba32 toward U/V
 extern void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
@@ -361,8 +359,8 @@ extern void (*WebPConvertBGRToY)(const uint8_t* WEBP_RESTRICT bgr,
 // used for plain-C fallback.
 extern void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
                                  uint8_t* WEBP_RESTRICT u,
-                                  uint8_t* WEBP_RESTRICT v,
-                                  int src_width, int do_store);
+                                  uint8_t* WEBP_RESTRICT v, int src_width,
+                                  int do_store);
 extern void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
                                    uint8_t* WEBP_RESTRICT u,
                                    uint8_t* WEBP_RESTRICT v, int width);
@@ -403,9 +401,8 @@ extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
 extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);

 // Main entry calls:
-extern void WebPRescalerImportRow(
-    struct WebPRescaler* WEBP_RESTRICT const wrk,
-    const uint8_t* WEBP_RESTRICT src);
+extern void WebPRescalerImportRow(struct WebPRescaler* WEBP_RESTRICT const wrk,
+                                  const uint8_t* WEBP_RESTRICT src);
 // Export one row (starting at x_out position) from rescaler.
 extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);

@@ -417,12 +414,12 @@ void WebPRescalerDspInit(void);

 // Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
 // alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
-extern void (*WebPApplyAlphaMultiply)(
-    uint8_t* rgba, int alpha_first, int w, int h, int stride);
+extern void (*WebPApplyAlphaMultiply)(uint8_t* rgba, int alpha_first, int w,
+                                      int h, int stride);

 // Same, buf specifically for RGBA4444 format
-extern void (*WebPApplyAlphaMultiply4444)(
-    uint8_t* rgba4444, int w, int h, int stride);
+extern void (*WebPApplyAlphaMultiply4444)(uint8_t* rgba4444, int w, int h,
+                                          int stride);

 // Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
 // Returns true if alpha[] plane has non-trivial values different from 0xff.
@@ -442,8 +439,7 @@ extern void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT alpha,
 // Returns true if there's only trivial 0xff alpha values.
 extern int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT argb,
                               int argb_stride, int width, int height,
-                               uint8_t* WEBP_RESTRICT alpha,
-                               int alpha_stride);
+                               uint8_t* WEBP_RESTRICT alpha, int alpha_stride);

 // Extract the green values from 32b values in argb[] and pack them into alpha[]
 // (this is the opposite of WebPDispatchAlphaToGreen).
@@ -462,8 +458,8 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,

 // Same for a row of single values, with side alpha values.
 extern void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
-                           const uint8_t* WEBP_RESTRICT const alpha,
-                           int width, int inverse);
+                           const uint8_t* WEBP_RESTRICT const alpha, int width,
+                           int inverse);

 // Same a WebPMultRow(), but for several 'num_rows' rows.
 void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
@@ -472,8 +468,8 @@ void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,

 // Plain-C versions, used as fallback by some implementations.
 void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
-                   const uint8_t* WEBP_RESTRICT const alpha,
-                   int width, int inverse);
+                   const uint8_t* WEBP_RESTRICT const alpha, int width,
+                   int inverse);
 void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);

 #ifdef WORDS_BIGENDIAN
@@ -481,15 +477,15 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
 extern void (*WebPPackARGB)(const uint8_t* WEBP_RESTRICT a,
                            const uint8_t* WEBP_RESTRICT r,
                            const uint8_t* WEBP_RESTRICT g,
-                            const uint8_t* WEBP_RESTRICT b,
-                            int len, uint32_t* WEBP_RESTRICT out);
+                            const uint8_t* WEBP_RESTRICT b, int len,
+                            uint32_t* WEBP_RESTRICT out);
 #endif

 // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
 extern void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
                           const uint8_t* WEBP_RESTRICT g,
-                           const uint8_t* WEBP_RESTRICT b,
-                           int len, int step, uint32_t* WEBP_RESTRICT out);
+                           const uint8_t* WEBP_RESTRICT b, int len, int step,
+                           uint32_t* WEBP_RESTRICT out);

 // This function returns true if src[i] contains a value different from 0xff.
 extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
@@ -504,18 +500,18 @@ void WebPInitAlphaProcessing(void);
 //------------------------------------------------------------------------------
 // Filter functions

-typedef enum {     // Filter types.
+typedef enum {  // Filter types.
  WEBP_FILTER_NONE = 0,
  WEBP_FILTER_HORIZONTAL,
  WEBP_FILTER_VERTICAL,
  WEBP_FILTER_GRADIENT,
  WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1,  // end marker
-  WEBP_FILTER_BEST,    // meta-types
+  WEBP_FILTER_BEST,                             // meta-types
  WEBP_FILTER_FAST
 } WEBP_FILTER_TYPE;

-typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in,
-                               int width, int height, int stride,
+typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in, int width,
+                               int height, int stride,
                               uint8_t* WEBP_RESTRICT out);
 // In-place un-filtering.
 // Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
@@ -538,7 +534,7 @@ extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
 void VP8FiltersInit(void);

 #ifdef __cplusplus
-}    // extern "C"
+}  // extern "C"
 #endif

 #endif  // WEBP_DSP_DSP_H_
--- a/src/dsp/enc.c
+++ b/src/dsp/enc.c
@@ -26,9 +26,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 }

 #if !WEBP_NEON_OMIT_C_CODE
-static WEBP_INLINE int clip_max(int v, int max) {
-  return (v > max) ? max : v;
-}
+static WEBP_INLINE int clip_max(int v, int max) { return (v > max) ? max : v; }
 #endif  // !WEBP_NEON_OMIT_C_CODE

 //------------------------------------------------------------------------------
@@ -36,14 +34,14 @@ static WEBP_INLINE int clip_max(int v, int max) {
 // the higher, the "easier" the macroblock is to compress.

 const int VP8DspScan[16 + 4 + 4] = {
-  // Luma
-  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
-  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
-  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
-  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
+    // Luma
+    0 + 0 * BPS,  4 + 0 * BPS,  8 + 0 * BPS,  12 + 0 * BPS,
+    0 + 4 * BPS,  4 + 4 * BPS,  8 + 4 * BPS,  12 + 4 * BPS,
+    0 + 8 * BPS,  4 + 8 * BPS,  8 + 8 * BPS,  12 + 8 * BPS,
+    0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,

-  0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
-  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
+    0 + 0 * BPS,  4 + 0 * BPS,  0 + 4 * BPS,  4 + 4 * BPS,  // U
+    8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS,  12 + 4 * BPS  // V
 };

 // general-purpose util function
@@ -68,7 +66,7 @@ static void CollectHistogram_C(const uint8_t* WEBP_RESTRICT ref,
                               int start_block, int end_block,
                               VP8Histogram* WEBP_RESTRICT const histo) {
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int k;
    int16_t out[16];
@@ -89,7 +87,7 @@ static void CollectHistogram_C(const uint8_t* WEBP_RESTRICT ref,
 //------------------------------------------------------------------------------
 // run-time tables (~4k)

-static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
+static uint8_t clip1[255 + 510 + 1];  // clips [-255,510] to [0,255]

 // We declare this variable 'volatile' to prevent instruction reordering
 // and make sure it's set to true _last_ (so as to be thread-safe)
@@ -105,7 +103,6 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
  }
 }

-
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)

@@ -120,7 +117,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
  int C[4 * 4], *tmp;
  int i;
  tmp = C;
-  for (i = 0; i < 4; ++i) {    // vertical pass
+  for (i = 0; i < 4; ++i) {  // vertical pass
    const int a = in[0] + in[8];
    const int b = in[0] - in[8];
    const int c =
@@ -136,7 +133,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
  }

  tmp = C;
-  for (i = 0; i < 4; ++i) {    // horizontal pass
+  for (i = 0; i < 4; ++i) {  // horizontal pass
    const int dc = tmp[0] + 4;
    const int a = dc + tmp[8];
    const int b = dc - tmp[8];
@@ -154,8 +151,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,

 static void ITransform_C(const uint8_t* WEBP_RESTRICT ref,
                         const int16_t* WEBP_RESTRICT in,
-                         uint8_t* WEBP_RESTRICT dst,
-                         int do_two) {
+                         uint8_t* WEBP_RESTRICT dst, int do_two) {
  ITransformOne(ref, in, dst);
  if (do_two) {
    ITransformOne(ref + 4, in + 16, dst + 4);
@@ -168,28 +164,28 @@ static void FTransform_C(const uint8_t* WEBP_RESTRICT src,
  int i;
  int tmp[16];
  for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
-    const int d0 = src[0] - ref[0];   // 9bit dynamic range ([-255,255])
+    const int d0 = src[0] - ref[0];  // 9bit dynamic range ([-255,255])
    const int d1 = src[1] - ref[1];
    const int d2 = src[2] - ref[2];
    const int d3 = src[3] - ref[3];
-    const int a0 = (d0 + d3);         // 10b                      [-510,510]
+    const int a0 = (d0 + d3);  // 10b [-510,510]
    const int a1 = (d1 + d2);
    const int a2 = (d1 - d2);
    const int a3 = (d0 - d3);
-    tmp[0 + i * 4] = (a0 + a1) * 8;   // 14b                      [-8160,8160]
-    tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9;      // [-7536,7542]
+    tmp[0 + i * 4] = (a0 + a1) * 8;                        // 14b [-8160,8160]
+    tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9;  // [-7536,7542]
    tmp[2 + i * 4] = (a0 - a1) * 8;
-    tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 +  937) >> 9;
+    tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9;
  }
  for (i = 0; i < 4; ++i) {
    const int a0 = (tmp[0 + i] + tmp[12 + i]);  // 15b
-    const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
-    const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
+    const int a1 = (tmp[4 + i] + tmp[8 + i]);
+    const int a2 = (tmp[4 + i] - tmp[8 + i]);
    const int a3 = (tmp[0 + i] - tmp[12 + i]);
-    out[0 + i] = (a0 + a1 + 7) >> 4;            // 12b
+    out[0 + i] = (a0 + a1 + 7) >> 4;  // 12b
    out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
    out[8 + i] = (a0 - a1 + 7) >> 4;
-    out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
+    out[12 + i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
  }
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE
@@ -212,23 +208,23 @@ static void FTransformWHT_C(const int16_t* WEBP_RESTRICT in,
    const int a1 = (in[1 * 16] + in[3 * 16]);
    const int a2 = (in[1 * 16] - in[3 * 16]);
    const int a3 = (in[0 * 16] - in[2 * 16]);
-    tmp[0 + i * 4] = a0 + a1;   // 14b
+    tmp[0 + i * 4] = a0 + a1;  // 14b
    tmp[1 + i * 4] = a3 + a2;
    tmp[2 + i * 4] = a3 - a2;
    tmp[3 + i * 4] = a0 - a1;
  }
  for (i = 0; i < 4; ++i) {
    const int a0 = (tmp[0 + i] + tmp[8 + i]);  // 15b
-    const int a1 = (tmp[4 + i] + tmp[12+ i]);
-    const int a2 = (tmp[4 + i] - tmp[12+ i]);
+    const int a1 = (tmp[4 + i] + tmp[12 + i]);
+    const int a2 = (tmp[4 + i] - tmp[12 + i]);
    const int a3 = (tmp[0 + i] - tmp[8 + i]);
-    const int b0 = a0 + a1;    // 16b
+    const int b0 = a0 + a1;  // 16b
    const int b1 = a3 + a2;
    const int b2 = a3 - a2;
    const int b3 = a0 - a1;
-    out[ 0 + i] = b0 >> 1;     // 15b
-    out[ 4 + i] = b1 >> 1;
-    out[ 8 + i] = b2 >> 1;
+    out[0 + i] = b0 >> 1;  // 15b
+    out[4 + i] = b1 >> 1;
+    out[8 + i] = b2 >> 1;
    out[12 + i] = b3 >> 1;
  }
 }
@@ -303,23 +299,23 @@ static WEBP_INLINE void TrueMotion(uint8_t* WEBP_RESTRICT dst,

 static WEBP_INLINE void DCMode(uint8_t* WEBP_RESTRICT dst,
                               const uint8_t* WEBP_RESTRICT left,
-                               const uint8_t* WEBP_RESTRICT top,
-                               int size, int round, int shift) {
+                               const uint8_t* WEBP_RESTRICT top, int size,
+                               int round, int shift) {
  int DC = 0;
  int j;
  if (top != NULL) {
    for (j = 0; j < size; ++j) DC += top[j];
-    if (left != NULL) {   // top and left present
+    if (left != NULL) {  // top and left present
      for (j = 0; j < size; ++j) DC += left[j];
-    } else {      // top, but no left
+    } else {  // top, but no left
      DC += DC;
    }
    DC = (DC + round) >> shift;
-  } else if (left != NULL) {   // left but no top
+  } else if (left != NULL) {  // left but no top
    for (j = 0; j < size; ++j) DC += left[j];
    DC += DC;
    DC = (DC + round) >> shift;
-  } else {   // no top, no left, nothing.
+  } else {  // no top, no left, nothing.
    DC = 0x80;
  }
  Fill(dst, DC, size);
@@ -372,10 +368,10 @@ static void Intra16Preds_C(uint8_t* WEBP_RESTRICT dst,
 // vertical
 static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  const uint8_t vals[4] = {
-    AVG3(top[-1], top[0], top[1]),
-    AVG3(top[ 0], top[1], top[2]),
-    AVG3(top[ 1], top[2], top[3]),
-    AVG3(top[ 2], top[3], top[4])
+      AVG3(top[-1], top[0], top[1]),
+      AVG3(top[0], top[1], top[2]),
+      AVG3(top[1], top[2], top[3]),
+      AVG3(top[2], top[3], top[4]),
  };
  int i;
  for (i = 0; i < 4; ++i) {
@@ -413,13 +409,13 @@ static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  const int B = top[1];
  const int C = top[2];
  const int D = top[3];
-  DST(0, 3)                                     = AVG3(J, K, L);
-  DST(0, 2) = DST(1, 3)                         = AVG3(I, J, K);
-  DST(0, 1) = DST(1, 2) = DST(2, 3)             = AVG3(X, I, J);
+  DST(0, 3) = AVG3(J, K, L);
+  DST(0, 2) = DST(1, 3) = AVG3(I, J, K);
+  DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J);
  DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
-  DST(1, 0) = DST(2, 1) = DST(3, 2)             = AVG3(B, A, X);
-  DST(2, 0) = DST(3, 1)                         = AVG3(C, B, A);
-  DST(3, 0)                                     = AVG3(D, C, B);
+  DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X);
+  DST(2, 0) = DST(3, 1) = AVG3(C, B, A);
+  DST(3, 0) = AVG3(D, C, B);
 }

 static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -431,13 +427,13 @@ static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  const int F = top[5];
  const int G = top[6];
  const int H = top[7];
-  DST(0, 0)                                     = AVG3(A, B, C);
-  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
-  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
+  DST(0, 0) = AVG3(A, B, C);
+  DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+  DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
-  DST(3, 1) = DST(2, 2) = DST(1, 3)             = AVG3(E, F, G);
-  DST(3, 2) = DST(2, 3)                         = AVG3(F, G, H);
-  DST(3, 3)                                     = AVG3(G, H, H);
+  DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+  DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+  DST(3, 3) = AVG3(G, H, H);
 }

 static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -452,14 +448,14 @@ static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  DST(0, 0) = DST(1, 2) = AVG2(X, A);
  DST(1, 0) = DST(2, 2) = AVG2(A, B);
  DST(2, 0) = DST(3, 2) = AVG2(B, C);
-  DST(3, 0)             = AVG2(C, D);
+  DST(3, 0) = AVG2(C, D);

-  DST(0, 3) =             AVG3(K, J, I);
-  DST(0, 2) =             AVG3(J, I, X);
+  DST(0, 3) = AVG3(K, J, I);
+  DST(0, 2) = AVG3(J, I, X);
  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
-  DST(3, 1) =             AVG3(B, C, D);
+  DST(3, 1) = AVG3(B, C, D);
 }

 static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -471,17 +467,17 @@ static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  const int F = top[5];
  const int G = top[6];
  const int H = top[7];
-  DST(0, 0) =             AVG2(A, B);
+  DST(0, 0) = AVG2(A, B);
  DST(1, 0) = DST(0, 2) = AVG2(B, C);
  DST(2, 0) = DST(1, 2) = AVG2(C, D);
  DST(3, 0) = DST(2, 2) = AVG2(D, E);

-  DST(0, 1) =             AVG3(A, B, C);
+  DST(0, 1) = AVG3(A, B, C);
  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
-              DST(3, 2) = AVG3(E, F, G);
-              DST(3, 3) = AVG3(F, G, H);
+  DST(3, 2) = AVG3(E, F, G);
+  DST(3, 3) = AVG3(F, G, H);
 }

 static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -489,14 +485,13 @@ static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  const int J = top[-3];
  const int K = top[-4];
  const int L = top[-5];
-  DST(0, 0) =             AVG2(I, J);
+  DST(0, 0) = AVG2(I, J);
  DST(2, 0) = DST(0, 1) = AVG2(J, K);
  DST(2, 1) = DST(0, 2) = AVG2(K, L);
-  DST(1, 0) =             AVG3(I, J, K);
+  DST(1, 0) = AVG3(I, J, K);
  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
-  DST(3, 2) = DST(2, 2) =
-  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+  DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }

 static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -512,14 +507,14 @@ static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  DST(0, 0) = DST(2, 1) = AVG2(I, X);
  DST(0, 1) = DST(2, 2) = AVG2(J, I);
  DST(0, 2) = DST(2, 3) = AVG2(K, J);
-  DST(0, 3)             = AVG2(L, K);
+  DST(0, 3) = AVG2(L, K);

-  DST(3, 0)             = AVG3(A, B, C);
-  DST(2, 0)             = AVG3(X, A, B);
+  DST(3, 0) = AVG3(A, B, C);
+  DST(2, 0) = AVG3(X, A, B);
  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
-  DST(1, 3)             = AVG3(L, K, J);
+  DST(1, 3) = AVG3(L, K, J);
 }

 static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -561,8 +556,7 @@ static void Intra4Preds_C(uint8_t* WEBP_RESTRICT dst,

 #if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int GetSSE(const uint8_t* WEBP_RESTRICT a,
-                              const uint8_t* WEBP_RESTRICT b,
-                              int w, int h) {
+                              const uint8_t* WEBP_RESTRICT b, int w, int h) {
  int count = 0;
  int y, x;
  for (y = 0; y < h; ++y) {
@@ -604,7 +598,7 @@ static void Mean16x4_C(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
      }
    }
    dc[k] = avg;
-    ref += 4;   // go to next 4x4 block.
+    ref += 4;  // go to next 4x4 block.
  }
 }

@@ -637,17 +631,17 @@ static int TTransform(const uint8_t* WEBP_RESTRICT in,
  // vertical pass
  for (i = 0; i < 4; ++i, ++w) {
    const int a0 = tmp[0 + i] + tmp[8 + i];
-    const int a1 = tmp[4 + i] + tmp[12+ i];
-    const int a2 = tmp[4 + i] - tmp[12+ i];
+    const int a1 = tmp[4 + i] + tmp[12 + i];
+    const int a2 = tmp[4 + i] - tmp[12 + i];
    const int a3 = tmp[0 + i] - tmp[8 + i];
    const int b0 = a0 + a1;
    const int b1 = a3 + a2;
    const int b2 = a3 - a2;
    const int b3 = a0 - a1;

-    sum += w[ 0] * abs(b0);
-    sum += w[ 4] * abs(b1);
-    sum += w[ 8] * abs(b2);
+    sum += w[0] * abs(b0);
+    sum += w[4] * abs(b1);
+    sum += w[8] * abs(b2);
    sum += w[12] * abs(b3);
  }
  return sum;
@@ -680,9 +674,8 @@ static int Disto16x16_C(const uint8_t* WEBP_RESTRICT const a,
 //

 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static const uint8_t kZigzag[16] = {
-  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
-};
+static const uint8_t kZigzag[16] = {0, 1,  4,  8,  5, 2,  3,  6,
+                                    9, 12, 13, 10, 7, 11, 14, 15};

 // Simple quantization
 static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
@@ -714,7 +707,7 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
 static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
                             const VP8Matrix* WEBP_RESTRICT const mtx) {
  int nz;
-  nz  = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
  nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
  return nz;
 }
--- a/src/dsp/enc_mips32.c
+++ b/src/dsp/enc_mips32.c
@@ -18,8 +18,8 @@
 #if defined(WEBP_USE_MIPS32)

 #include "src/dsp/mips_macro.h"
-#include "src/enc/vp8i_enc.h"
 #include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"

 static const int kC1 = WEBP_TRANSFORM_AC3_C1;
 static const int kC2 = WEBP_TRANSFORM_AC3_C2;
@@ -30,6 +30,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
 // A..D - offsets in bytes to load from in buffer
 // TEMP0..TEMP3 - registers for corresponding tmp elements
 // TEMP4..TEMP5 - temporary registers
+// clang-format off
 #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
  "lh      %[temp16],      " #A "(%[temp20])                 \n\t"          \
  "lh      %[temp18],      " #B "(%[temp20])                 \n\t"          \
@@ -107,6 +108,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
  "sb      %[" #TEMP4 "],    1+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
  "sb      %[" #TEMP8 "],    2+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
  "sb      %[" #TEMP12 "],   3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
+// clang-format on

 // Does one or two inverse transforms.
 static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
@@ -118,27 +120,26 @@ static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
  const int* args[3] = {(const int*)ref, (const int*)in, (const int*)dst};

  __asm__ volatile(
-    "lw      %[temp20],      4(%[args])                      \n\t"
-    VERTICAL_PASS(0, 16,  8, 24, temp4,  temp0,  temp1,  temp2,  temp3)
-    VERTICAL_PASS(2, 18, 10, 26, temp8,  temp4,  temp5,  temp6,  temp7)
-    VERTICAL_PASS(4, 20, 12, 28, temp12, temp8,  temp9,  temp10, temp11)
-    VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)
+      "lw      %[temp20],      4(%[args])                      \n\t"        //
+      VERTICAL_PASS(0, 16, 8, 24, temp4, temp0, temp1, temp2, temp3)        //
+      VERTICAL_PASS(2, 18, 10, 26, temp8, temp4, temp5, temp6, temp7)       //
+      VERTICAL_PASS(4, 20, 12, 28, temp12, temp8, temp9, temp10, temp11)    //
+      VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)  //

-    HORIZONTAL_PASS(0, temp0, temp4, temp8,  temp12)
-    HORIZONTAL_PASS(1, temp1, temp5, temp9,  temp13)
-    HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14)
-    HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15)
+      HORIZONTAL_PASS(0, temp0, temp4, temp8, temp12)   //
+      HORIZONTAL_PASS(1, temp1, temp5, temp9, temp13)   //
+      HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14)  //
+      HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15)  //

-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
-      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
-      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
-      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
-      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
-    : [args]"r"(args), [kC1]"r"(kC1), [kC2]"r"(kC2)
-    : "memory", "hi", "lo"
-  );
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8),
+        [temp9] "=&r"(temp9), [temp10] "=&r"(temp10), [temp11] "=&r"(temp11),
+        [temp12] "=&r"(temp12), [temp13] "=&r"(temp13), [temp14] "=&r"(temp14),
+        [temp15] "=&r"(temp15), [temp16] "=&r"(temp16), [temp17] "=&r"(temp17),
+        [temp18] "=&r"(temp18), [temp19] "=&r"(temp19), [temp20] "=&r"(temp20)
+      : [args] "r"(args), [kC1] "r"(kC1), [kC2] "r"(kC2)
+      : "memory", "hi", "lo");
 }

 static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
@@ -158,6 +159,7 @@ static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
 // J - offset in bytes (kZigzag[n] * 2)
 // K - offset in bytes (kZigzag[n] * 4)
 // N - offset in bytes (n * 2)
+// clang-format off
 #define QUANTIZE_ONE(J, K, N)                                               \
  "lh           %[temp0],       " #J "(%[ppin])                     \n\t"   \
  "lhu          %[temp1],       " #J "(%[ppsharpen])                \n\t"   \
@@ -184,6 +186,7 @@ static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
 "2:                                                                 \n\t"   \
  "sh           %[temp5],       " #J "(%[ppin])                     \n\t"   \
  "sh           %[level],       " #N "(%[pout])                     \n\t"
+// clang-format on

 static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
                                const VP8Matrix* const mtx) {
@@ -191,43 +194,39 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
  int sign, coeff, level, i;
  int max_level = MAX_LEVEL;

-  int16_t* ppin             = &in[0];
-  int16_t* pout             = &out[0];
+  int16_t* ppin = &in[0];
+  int16_t* pout = &out[0];
  const uint16_t* ppsharpen = &mtx->sharpen[0];
  const uint32_t* ppzthresh = &mtx->zthresh[0];
-  const uint16_t* ppq       = &mtx->q[0];
-  const uint16_t* ppiq      = &mtx->iq[0];
-  const uint32_t* ppbias    = &mtx->bias[0];
+  const uint16_t* ppq = &mtx->q[0];
+  const uint16_t* ppiq = &mtx->iq[0];
+  const uint32_t* ppbias = &mtx->bias[0];

  __asm__ volatile(
-    QUANTIZE_ONE( 0,  0,  0)
-    QUANTIZE_ONE( 2,  4,  2)
-    QUANTIZE_ONE( 8, 16,  4)
-    QUANTIZE_ONE(16, 32,  6)
-    QUANTIZE_ONE(10, 20,  8)
-    QUANTIZE_ONE( 4,  8, 10)
-    QUANTIZE_ONE( 6, 12, 12)
-    QUANTIZE_ONE(12, 24, 14)
-    QUANTIZE_ONE(18, 36, 16)
-    QUANTIZE_ONE(24, 48, 18)
-    QUANTIZE_ONE(26, 52, 20)
-    QUANTIZE_ONE(20, 40, 22)
-    QUANTIZE_ONE(14, 28, 24)
-    QUANTIZE_ONE(22, 44, 26)
-    QUANTIZE_ONE(28, 56, 28)
-    QUANTIZE_ONE(30, 60, 30)
+      QUANTIZE_ONE(0, 0, 0)     //
+      QUANTIZE_ONE(2, 4, 2)     //
+      QUANTIZE_ONE(8, 16, 4)    //
+      QUANTIZE_ONE(16, 32, 6)   //
+      QUANTIZE_ONE(10, 20, 8)   //
+      QUANTIZE_ONE(4, 8, 10)    //
+      QUANTIZE_ONE(6, 12, 12)   //
+      QUANTIZE_ONE(12, 24, 14)  //
+      QUANTIZE_ONE(18, 36, 16)  //
+      QUANTIZE_ONE(24, 48, 18)  //
+      QUANTIZE_ONE(26, 52, 20)  //
+      QUANTIZE_ONE(20, 40, 22)  //
+      QUANTIZE_ONE(14, 28, 24)  //
+      QUANTIZE_ONE(22, 44, 26)  //
+      QUANTIZE_ONE(28, 56, 28)  //
+      QUANTIZE_ONE(30, 60, 30)  //

-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [sign]"=&r"(sign), [coeff]"=&r"(coeff),
-      [level]"=&r"(level)
-    : [pout]"r"(pout), [ppin]"r"(ppin),
-      [ppiq]"r"(ppiq), [max_level]"r"(max_level),
-      [ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
-      [ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
-    : "memory", "hi", "lo"
-  );
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+        [sign] "=&r"(sign), [coeff] "=&r"(coeff), [level] "=&r"(level)
+      : [pout] "r"(pout), [ppin] "r"(ppin), [ppiq] "r"(ppiq),
+        [max_level] "r"(max_level), [ppbias] "r"(ppbias),
+        [ppzthresh] "r"(ppzthresh), [ppsharpen] "r"(ppsharpen), [ppq] "r"(ppq)
+      : "memory", "hi", "lo");

  // moved out from macro to increase possibility for earlier breaking
  for (i = 15; i >= 0; i--) {
@@ -239,7 +238,7 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
 static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
                                  const VP8Matrix* WEBP_RESTRICT const mtx) {
  int nz;
-  nz  = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
  nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
  return nz;
 }
@@ -251,6 +250,7 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
 // A - offset in bytes to load from a and b buffers
 // E..H - offsets in bytes to store first results to tmp buffer
 // E1..H1 - offsets in bytes to store second results to tmp buffer
+// clang-format off
 #define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1)                  \
  "lbu    %[temp0],  0+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
  "lbu    %[temp1],  1+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
@@ -358,6 +358,7 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
  "msub   %[temp5],  %[temp8]                \n\t"                \
  "msub   %[temp6],  %[temp0]                \n\t"                \
  "msub   %[temp7],  %[temp1]                \n\t"
+// clang-format on

 static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
                           const uint8_t* WEBP_RESTRICT const b,
@@ -366,28 +367,27 @@ static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;

  __asm__ volatile(
-    HORIZONTAL_PASS(0,   0,  4,  8, 12,    64,  68,  72,  76)
-    HORIZONTAL_PASS(1,  16, 20, 24, 28,    80,  84,  88,  92)
-    HORIZONTAL_PASS(2,  32, 36, 40, 44,    96, 100, 104, 108)
-    HORIZONTAL_PASS(3,  48, 52, 56, 60,   112, 116, 120, 124)
-    "mthi   $zero                             \n\t"
-    "mtlo   $zero                             \n\t"
-    VERTICAL_PASS( 0, 16, 32, 48,     64, 80,  96, 112,   0,  8, 16, 24)
-    VERTICAL_PASS( 4, 20, 36, 52,     68, 84, 100, 116,   2, 10, 18, 26)
-    VERTICAL_PASS( 8, 24, 40, 56,     72, 88, 104, 120,   4, 12, 20, 28)
-    VERTICAL_PASS(12, 28, 44, 60,     76, 92, 108, 124,   6, 14, 22, 30)
-    "mflo   %[temp0]                          \n\t"
-    "sra    %[temp1],  %[temp0],  31          \n\t"
-    "xor    %[temp0],  %[temp0],  %[temp1]    \n\t"
-    "subu   %[temp0],  %[temp0],  %[temp1]    \n\t"
-    "sra    %[temp0],  %[temp0],  5           \n\t"
+      HORIZONTAL_PASS(0, 0, 4, 8, 12, 64, 68, 72, 76)         //
+      HORIZONTAL_PASS(1, 16, 20, 24, 28, 80, 84, 88, 92)      //
+      HORIZONTAL_PASS(2, 32, 36, 40, 44, 96, 100, 104, 108)   //
+      HORIZONTAL_PASS(3, 48, 52, 56, 60, 112, 116, 120, 124)  //
+      "mthi   $zero                             \n\t"
+      "mtlo   $zero                             \n\t"                 //
+      VERTICAL_PASS(0, 16, 32, 48, 64, 80, 96, 112, 0, 8, 16, 24)     //
+      VERTICAL_PASS(4, 20, 36, 52, 68, 84, 100, 116, 2, 10, 18, 26)   //
+      VERTICAL_PASS(8, 24, 40, 56, 72, 88, 104, 120, 4, 12, 20, 28)   //
+      VERTICAL_PASS(12, 28, 44, 60, 76, 92, 108, 124, 6, 14, 22, 30)  //
+      "mflo   %[temp0]                          \n\t"
+      "sra    %[temp1],  %[temp0],  31          \n\t"
+      "xor    %[temp0],  %[temp0],  %[temp1]    \n\t"
+      "subu   %[temp0],  %[temp0],  %[temp1]    \n\t"
+      "sra    %[temp0],  %[temp0],  5           \n\t"

-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
-    : [a]"r"(a), [b]"r"(b), [w]"r"(w), [tmp]"r"(tmp)
-    : "memory", "hi", "lo"
-  );
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8)
+      : [a] "r"(a), [b] "r"(b), [w] "r"(w), [tmp] "r"(tmp)
+      : "memory", "hi", "lo");

  return temp0;
 }
@@ -412,6 +412,7 @@ static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
 // temp0..temp15 holds tmp[0]..tmp[15]
 // A - offset in bytes to load from src and ref buffers
 // TEMP0..TEMP3 - registers for corresponding tmp elements
+// clang-format off
 #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3)                  \
  "lw     %[" #TEMP1 "],  0(%[args])                           \n\t"    \
  "lw     %[" #TEMP2 "],  4(%[args])                           \n\t"    \
@@ -477,6 +478,7 @@ static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
  "sh     %[" #TEMP4 "],  " #C "(%[temp20])              \n\t"    \
  "sh     %[" #TEMP8 "],  " #D "(%[temp20])              \n\t"    \
  "sh     %[" #TEMP12 "], " #B "(%[temp20])              \n\t"
+// clang-format on

 static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
                              const uint8_t* WEBP_RESTRICT ref,
@@ -486,8 +488,8 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
  int temp17, temp18, temp19, temp20;
  const int c2217 = 2217;
  const int c5352 = 5352;
-  const int* const args[3] =
-      { (const int*)src, (const int*)ref, (const int*)out };
+  const int* const args[3] = {(const int*)src, (const int*)ref,
+                              (const int*)out};

  __asm__ volatile(
    HORIZONTAL_PASS(0, temp0,  temp1,  temp2,  temp3)
@@ -517,6 +519,7 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,

 #if !defined(WORK_AROUND_GCC)

+// clang-format off
 #define GET_SSE_INNER(A, B, C, D)                               \
  "lbu     %[temp0],    " #A "(%[a])                 \n\t"      \
  "lbu     %[temp1],    " #A "(%[b])                 \n\t"      \
@@ -534,11 +537,12 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
  "madd    %[temp2],    %[temp2]                     \n\t"      \
  "madd    %[temp4],    %[temp4]                     \n\t"      \
  "madd    %[temp6],    %[temp6]                     \n\t"
+// clang-format on

-#define GET_SSE(A, B, C, D)               \
-  GET_SSE_INNER(A, A + 1, A + 2, A + 3)   \
-  GET_SSE_INNER(B, B + 1, B + 2, B + 3)   \
-  GET_SSE_INNER(C, C + 1, C + 2, C + 3)   \
+#define GET_SSE(A, B, C, D)             \
+  GET_SSE_INNER(A, A + 1, A + 2, A + 3) \
+  GET_SSE_INNER(B, B + 1, B + 2, B + 3) \
+  GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
  GET_SSE_INNER(D, D + 1, D + 2, D + 3)

 static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
@@ -547,32 +551,31 @@ static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

  __asm__ volatile(
-     "mult   $zero,    $zero                            \n\t"
+      "mult   $zero,    $zero                            \n\t"

-     GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
-     GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
-     GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
-     GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
-     GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
-     GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
-     GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
-     GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
-     GET_SSE( 8 * BPS, 4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS)
-     GET_SSE( 9 * BPS, 4 +  9 * BPS, 8 +  9 * BPS, 12 +  9 * BPS)
-     GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
-     GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
-     GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
-     GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
-     GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
-     GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)      //
+      GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)      //
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)      //
+      GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)      //
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)      //
+      GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)      //
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)      //
+      GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)      //
+      GET_SSE(8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS)      //
+      GET_SSE(9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS)      //
+      GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)  //
+      GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)  //
+      GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)  //
+      GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)  //
+      GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)  //
+      GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)  //

-    "mflo    %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
+      "mflo    %[count]                                  \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
+      : [a] "r"(a), [b] "r"(b)
+      : "memory", "hi", "lo");
  return count;
 }

@@ -582,24 +585,23 @@ static int SSE16x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

  __asm__ volatile(
-     "mult   $zero,    $zero                            \n\t"
+      "mult   $zero,    $zero                            \n\t"

-     GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
-     GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
-     GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
-     GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
-     GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
-     GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
-     GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
-     GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)  //
+      GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)  //
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)  //
+      GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)  //
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)  //
+      GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)  //
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)  //
+      GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)  //

-    "mflo    %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
+      "mflo    %[count]                                  \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
+      : [a] "r"(a), [b] "r"(b)
+      : "memory", "hi", "lo");
  return count;
 }

@@ -609,20 +611,19 @@ static int SSE8x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

  __asm__ volatile(
-     "mult   $zero,    $zero                            \n\t"
+      "mult   $zero,    $zero                            \n\t"

-     GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
-     GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
-     GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
-     GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)  //
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)  //
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)  //
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)  //

-    "mflo    %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
+      "mflo    %[count]                                  \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
+      : [a] "r"(a), [b] "r"(b)
+      : "memory", "hi", "lo");
  return count;
 }

@@ -632,17 +633,16 @@ static int SSE4x4_MIPS32(const uint8_t* WEBP_RESTRICT a,
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

  __asm__ volatile(
-     "mult   $zero,    $zero                            \n\t"
+      "mult   $zero,    $zero                            \n\t"

-     GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
+      GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)  //

-    "mflo    %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
+      "mflo    %[count]                                  \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
+      : [a] "r"(a), [b] "r"(b)
+      : "memory", "hi", "lo");
  return count;
 }

--- a/src/dsp/enc_mips_dsp_r2.c
+++ b/src/dsp/enc_mips_dsp_r2.c
@@ -25,6 +25,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;

 // O - output
 // I - input (macro doesn't change it)
+// clang-format off
 #define ADD_SUB_HALVES_X4(O0, O1, O2, O3, O4, O5, O6, O7,                      \
                          I0, I1, I2, I3, I4, I5, I6, I7)                      \
  "addq.ph          %[" #O0 "],   %[" #I0 "],  %[" #I1 "]     \n\t"            \
@@ -140,6 +141,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
  "sh              %[" #TEMP4 "],   " #C "(%[temp20])               \n\t"      \
  "sh              %[" #TEMP8 "],   " #D "(%[temp20])               \n\t"      \
  "sh              %[" #TEMP12 "],  " #B "(%[temp20])               \n\t"
+// clang-format on

 static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
                                 const uint8_t* WEBP_RESTRICT ref,
@@ -149,10 +151,10 @@ static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
  int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
  int temp17, temp18, temp19, temp20;
-  const int* const args[3] =
-      { (const int*)src, (const int*)ref, (const int*)out };
+  const int* const args[3] = {(const int*)src, (const int*)ref,
+                              (const int*)out};

-  __asm__ volatile (
+  __asm__ volatile(
    HORIZONTAL_PASS(0, temp0,  temp1,  temp2,  temp3)
    HORIZONTAL_PASS(1, temp4,  temp5,  temp6,  temp7)
    HORIZONTAL_PASS(2, temp8,  temp9,  temp10, temp11)
@@ -178,7 +180,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;

-  __asm__ volatile (
+  __asm__ volatile(
    "ulw              %[temp1],   0(%[in])                 \n\t"
    "ulw              %[temp2],   16(%[in])                \n\t"
    LOAD_IN_X2(temp5, temp6, 24, 26)
@@ -250,13 +252,14 @@ static void ITransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT ref,
  }
 }

+// clang-format off
 static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
                              const uint8_t* WEBP_RESTRICT const b,
                              const uint16_t* WEBP_RESTRICT const w) {
  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;

-  __asm__ volatile (
+  __asm__ volatile(
    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, a,
                        0, 0, 0, 0,
                        0, 1, 2, 3,
@@ -317,6 +320,7 @@ static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
  );
  return abs(temp3 - temp17) >> 5;
 }
+// clang-format on

 static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
                                const uint8_t* WEBP_RESTRICT const b,
@@ -334,6 +338,7 @@ static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
 //------------------------------------------------------------------------------
 // Intra predictions

+// clang-format off
 #define FILL_PART(J, SIZE)                                            \
    "usw        %[value],  0+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
    "usw        %[value],  4+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
@@ -342,118 +347,125 @@ static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
    "usw        %[value], 12+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
  ".endif                                                  \n\t"

-#define FILL_8_OR_16(DST, VALUE, SIZE) do {                         \
-  int value = (VALUE);                                              \
-  __asm__ volatile (                                                \
-    "replv.qb   %[value],  %[value]                      \n\t"      \
-    FILL_PART( 0, SIZE)                                             \
-    FILL_PART( 1, SIZE)                                             \
-    FILL_PART( 2, SIZE)                                             \
-    FILL_PART( 3, SIZE)                                             \
-    FILL_PART( 4, SIZE)                                             \
-    FILL_PART( 5, SIZE)                                             \
-    FILL_PART( 6, SIZE)                                             \
-    FILL_PART( 7, SIZE)                                             \
-  ".if " #SIZE " == 16                                   \n\t"      \
-    FILL_PART( 8, 16)                                               \
-    FILL_PART( 9, 16)                                               \
-    FILL_PART(10, 16)                                               \
-    FILL_PART(11, 16)                                               \
-    FILL_PART(12, 16)                                               \
-    FILL_PART(13, 16)                                               \
-    FILL_PART(14, 16)                                               \
-    FILL_PART(15, 16)                                               \
-  ".endif                                                \n\t"      \
-    : [value]"+&r"(value)                                           \
-    : [dst]"r"((DST))                                               \
-    : "memory"                                                      \
-  );                                                                \
-} while (0)
+#define FILL_8_OR_16(DST, VALUE, SIZE)                                \
+  do {                                                                \
+    int value = (VALUE);                                              \
+    __asm__ volatile(                                                 \
+      "replv.qb   %[value],  %[value]                      \n\t"      \
+      FILL_PART( 0, SIZE)                                             \
+      FILL_PART( 1, SIZE)                                             \
+      FILL_PART( 2, SIZE)                                             \
+      FILL_PART( 3, SIZE)                                             \
+      FILL_PART( 4, SIZE)                                             \
+      FILL_PART( 5, SIZE)                                             \
+      FILL_PART( 6, SIZE)                                             \
+      FILL_PART( 7, SIZE)                                             \
+    ".if " #SIZE " == 16                                   \n\t"      \
+      FILL_PART( 8, 16)                                               \
+      FILL_PART( 9, 16)                                               \
+      FILL_PART(10, 16)                                               \
+      FILL_PART(11, 16)                                               \
+      FILL_PART(12, 16)                                               \
+      FILL_PART(13, 16)                                               \
+      FILL_PART(14, 16)                                               \
+      FILL_PART(15, 16)                                               \
+    ".endif                                                \n\t"      \
+      : [value]"+&r"(value)                                           \
+      : [dst]"r"((DST))                                               \
+      : "memory"                                                      \
+    );                                                                \
+  } while (0)
+// clang-format on

-#define VERTICAL_PRED(DST, TOP, SIZE)                                          \
-static WEBP_INLINE void VerticalPred##SIZE(                                    \
-    uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (TOP)) {        \
-  int j;                                                                       \
-  if ((TOP)) {                                                                 \
-    for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE));       \
-  } else {                                                                     \
-    FILL_8_OR_16((DST), 127, (SIZE));                                          \
-  }                                                                            \
-}
+#define VERTICAL_PRED(DST, TOP, SIZE)                                      \
+  static WEBP_INLINE void VerticalPred##SIZE(                              \
+      uint8_t* WEBP_RESTRICT(DST), const uint8_t* WEBP_RESTRICT(TOP)) {    \
+    int j;                                                                 \
+    if ((TOP)) {                                                           \
+      for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE)); \
+    } else {                                                               \
+      FILL_8_OR_16((DST), 127, (SIZE));                                    \
+    }                                                                      \
+  }

 VERTICAL_PRED(dst, top, 8)
 VERTICAL_PRED(dst, top, 16)

 #undef VERTICAL_PRED

-#define HORIZONTAL_PRED(DST, LEFT, SIZE)                                       \
-static WEBP_INLINE void HorizontalPred##SIZE(                                  \
-    uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (LEFT)) {       \
-  if (LEFT) {                                                                  \
-    int j;                                                                     \
-    for (j = 0; j < (SIZE); ++j) {                                             \
-      memset((DST) + j * BPS, (LEFT)[j], (SIZE));                              \
-    }                                                                          \
-  } else {                                                                     \
-    FILL_8_OR_16((DST), 129, (SIZE));                                          \
-  }                                                                            \
-}
+#define HORIZONTAL_PRED(DST, LEFT, SIZE)                                 \
+  static WEBP_INLINE void HorizontalPred##SIZE(                          \
+      uint8_t* WEBP_RESTRICT(DST), const uint8_t* WEBP_RESTRICT(LEFT)) { \
+    if (LEFT) {                                                          \
+      int j;                                                             \
+      for (j = 0; j < (SIZE); ++j) {                                     \
+        memset((DST) + j * BPS, (LEFT)[j], (SIZE));                      \
+      }                                                                  \
+    } else {                                                             \
+      FILL_8_OR_16((DST), 129, (SIZE));                                  \
+    }                                                                    \
+  }

 HORIZONTAL_PRED(dst, left, 8)
 HORIZONTAL_PRED(dst, left, 16)

 #undef HORIZONTAL_PRED

-#define CLIPPING()                                                             \
-  "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t"                 \
-  "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t"                 \
-  "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t"                 \
-  "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t"                 \
-  "addu.ph         %[temp2],   %[temp2],   %[leftY_1]    \n\t"                 \
-  "addu.ph         %[temp0],   %[temp0],   %[leftY_1]    \n\t"                 \
-  "addu.ph         %[temp3],   %[temp3],   %[leftY_1]    \n\t"                 \
-  "addu.ph         %[temp1],   %[temp1],   %[leftY_1]    \n\t"                 \
-  "shll_s.ph       %[temp2],   %[temp2],   7             \n\t"                 \
-  "shll_s.ph       %[temp0],   %[temp0],   7             \n\t"                 \
-  "shll_s.ph       %[temp3],   %[temp3],   7             \n\t"                 \
-  "shll_s.ph       %[temp1],   %[temp1],   7             \n\t"                 \
-  "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t"                 \
+#define CLIPPING()                                             \
+  "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t" \
+  "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t" \
+  "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t" \
+  "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t" \
+  "addu.ph         %[temp2],   %[temp2],   %[leftY_1]    \n\t" \
+  "addu.ph         %[temp0],   %[temp0],   %[leftY_1]    \n\t" \
+  "addu.ph         %[temp3],   %[temp3],   %[leftY_1]    \n\t" \
+  "addu.ph         %[temp1],   %[temp1],   %[leftY_1]    \n\t" \
+  "shll_s.ph       %[temp2],   %[temp2],   7             \n\t" \
+  "shll_s.ph       %[temp0],   %[temp0],   7             \n\t" \
+  "shll_s.ph       %[temp3],   %[temp3],   7             \n\t" \
+  "shll_s.ph       %[temp1],   %[temp1],   7             \n\t" \
+  "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t" \
  "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"

-#define CLIP_8B_TO_DST(DST, LEFT, TOP, SIZE) do {                              \
-  int leftY_1 = ((int)(LEFT)[y] << 16) + (LEFT)[y];                            \
-  int temp0, temp1, temp2, temp3;                                              \
-  __asm__ volatile (                                                           \
-    "replv.ph        %[leftY_1], %[leftY_1]              \n\t"                 \
-    "ulw             %[temp0],   0(%[top])               \n\t"                 \
-    "ulw             %[temp1],   4(%[top])               \n\t"                 \
-    "subu.ph         %[leftY_1], %[leftY_1], %[left_1]   \n\t"                 \
-    CLIPPING()                                                                 \
-    "usw             %[temp0],   0(%[dst])               \n\t"                 \
-    "usw             %[temp1],   4(%[dst])               \n\t"                 \
-  ".if " #SIZE " == 16                                   \n\t"                 \
-    "ulw             %[temp0],   8(%[top])               \n\t"                 \
-    "ulw             %[temp1],   12(%[top])              \n\t"                 \
-    CLIPPING()                                                                 \
-    "usw             %[temp0],   8(%[dst])               \n\t"                 \
-    "usw             %[temp1],   12(%[dst])              \n\t"                 \
-  ".endif                                                \n\t"                 \
-    : [leftY_1]"+&r"(leftY_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),       \
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)                                 \
-    : [left_1]"r"(left_1), [top]"r"((TOP)), [dst]"r"((DST))                    \
-    : "memory"                                                                 \
-  );                                                                           \
-} while (0)
+// clang-format off
+#define CLIP_8B_TO_DST(DST, LEFT, TOP, SIZE)                                   \
+  do {                                                                         \
+    int leftY_1 = ((int)(LEFT)[y] << 16) + (LEFT)[y];                          \
+    int temp0, temp1, temp2, temp3;                                            \
+    __asm__ volatile(                                                          \
+      "replv.ph        %[leftY_1], %[leftY_1]              \n\t"               \
+      "ulw             %[temp0],   0(%[top])               \n\t"               \
+      "ulw             %[temp1],   4(%[top])               \n\t"               \
+      "subu.ph         %[leftY_1], %[leftY_1], %[left_1]   \n\t"               \
+      CLIPPING()                                                               \
+      "usw             %[temp0],   0(%[dst])               \n\t"               \
+      "usw             %[temp1],   4(%[dst])               \n\t"               \
+    ".if " #SIZE " == 16                                   \n\t"               \
+      "ulw             %[temp0],   8(%[top])               \n\t"               \
+      "ulw             %[temp1],   12(%[top])              \n\t"               \
+      CLIPPING()                                                               \
+      "usw             %[temp0],   8(%[dst])               \n\t"               \
+      "usw             %[temp1],   12(%[dst])              \n\t"               \
+    ".endif                                                \n\t"               \
+      : [leftY_1]"+&r"(leftY_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),     \
+        [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)                               \
+      : [left_1]"r"(left_1), [top]"r"((TOP)), [dst]"r"((DST))                  \
+      : "memory"                                                               \
+    );                                                                         \
+  } while (0)
+// clang-format on

-#define CLIP_TO_DST(DST, LEFT, TOP, SIZE) do {                                 \
-  int y;                                                                       \
-  const int left_1 = ((int)(LEFT)[-1] << 16) + (LEFT)[-1];                     \
-  for (y = 0; y < (SIZE); ++y) {                                               \
-    CLIP_8B_TO_DST((DST), (LEFT), (TOP), (SIZE));                              \
-    (DST) += BPS;                                                              \
-  }                                                                            \
-} while (0)
+#define CLIP_TO_DST(DST, LEFT, TOP, SIZE)                    \
+  do {                                                       \
+    int y;                                                   \
+    const int left_1 = ((int)(LEFT)[-1] << 16) + (LEFT)[-1]; \
+    for (y = 0; y < (SIZE); ++y) {                           \
+      CLIP_8B_TO_DST((DST), (LEFT), (TOP), (SIZE));          \
+      (DST) += BPS;                                          \
+    }                                                        \
+  } while (0)

+// clang-format off
 #define TRUE_MOTION(DST, LEFT, TOP, SIZE)                                      \
 static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST),         \
                                         const uint8_t* WEBP_RESTRICT (LEFT),  \
@@ -476,6 +488,7 @@ static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST),         \
    }                                                                          \
  }                                                                            \
 }
+// clang-format on

 TRUE_MOTION(dst, left, top, 8)
 TRUE_MOTION(dst, left, top, 16)
@@ -556,41 +569,40 @@ static WEBP_INLINE void DCMode8(uint8_t* WEBP_RESTRICT dst,
  int temp0, temp1, temp2, temp3;

  __asm__ volatile(
-    "beqz        %[top],   2f                  \n\t"
-    "ulw         %[temp0], 0(%[top])           \n\t"
-    "ulw         %[temp1], 4(%[top])           \n\t"
-    "raddu.w.qb  %[temp0], %[temp0]            \n\t"
-    "raddu.w.qb  %[temp1], %[temp1]            \n\t"
-    "addu        %[DC],    %[temp0], %[temp1]  \n\t"
-    "move        %[DC1],   %[DC]               \n\t"
-    "beqz        %[left],  1f                  \n\t"
-    "ulw         %[temp2], 0(%[left])          \n\t"
-    "ulw         %[temp3], 4(%[left])          \n\t"
-    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
-    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
-    "addu        %[DC1],   %[temp2], %[temp3]  \n\t"
-  "1:                                          \n\t"
-    "addu        %[DC],    %[DC],    %[DC1]    \n\t"
-    "j           3f                            \n\t"
-  "2:                                          \n\t"
-    "beqz        %[left],  4f                  \n\t"
-    "ulw         %[temp2], 0(%[left])          \n\t"
-    "ulw         %[temp3], 4(%[left])          \n\t"
-    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
-    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
-    "addu        %[DC],    %[temp2], %[temp3]  \n\t"
-    "addu        %[DC],    %[DC],    %[DC]     \n\t"
-  "3:                                          \n\t"
-    "shra_r.w    %[DC], %[DC], 4               \n\t"
-    "j           5f                            \n\t"
-  "4:                                          \n\t"
-    "li          %[DC], 0x80                   \n\t"
-  "5:                                          \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [DC]"=&r"(DC),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [DC1]"=&r"(DC1)
-    : [left]"r"(left), [top]"r"(top)
-    : "memory"
-  );
+      "beqz        %[top],   2f                  \n\t"
+      "ulw         %[temp0], 0(%[top])           \n\t"
+      "ulw         %[temp1], 4(%[top])           \n\t"
+      "raddu.w.qb  %[temp0], %[temp0]            \n\t"
+      "raddu.w.qb  %[temp1], %[temp1]            \n\t"
+      "addu        %[DC],    %[temp0], %[temp1]  \n\t"
+      "move        %[DC1],   %[DC]               \n\t"
+      "beqz        %[left],  1f                  \n\t"
+      "ulw         %[temp2], 0(%[left])          \n\t"
+      "ulw         %[temp3], 4(%[left])          \n\t"
+      "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+      "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+      "addu        %[DC1],   %[temp2], %[temp3]  \n\t"
+      "1:                                          \n\t"
+      "addu        %[DC],    %[DC],    %[DC1]    \n\t"
+      "j           3f                            \n\t"
+      "2:                                          \n\t"
+      "beqz        %[left],  4f                  \n\t"
+      "ulw         %[temp2], 0(%[left])          \n\t"
+      "ulw         %[temp3], 4(%[left])          \n\t"
+      "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+      "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+      "addu        %[DC],    %[temp2], %[temp3]  \n\t"
+      "addu        %[DC],    %[DC],    %[DC]     \n\t"
+      "3:                                          \n\t"
+      "shra_r.w    %[DC], %[DC], 4               \n\t"
+      "j           5f                            \n\t"
+      "4:                                          \n\t"
+      "li          %[DC], 0x80                   \n\t"
+      "5:                                          \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [DC] "=&r"(DC),
+        [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [DC1] "=&r"(DC1)
+      : [left] "r"(left), [top] "r"(top)
+      : "memory");

  FILL_8_OR_16(dst, DC, 8);
 }
@@ -619,7 +631,7 @@ static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
 static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int a10, a32, temp0, temp1, temp2, temp3, temp4, temp5;
  const int c35 = 0xff00ff;
-  __asm__ volatile (
+  __asm__ volatile(
    "lbu              %[temp1],  0(%[top])                     \n\t"
    "lbu              %[a10],    1(%[top])                     \n\t"
    "lbu              %[temp2],  2(%[top])                     \n\t"
@@ -790,7 +802,7 @@ static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
 static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw              %[temp0],   -4(%[top])              \n\t"
    "ulw              %[temp1],   0(%[top])               \n\t"
    "preceu.ph.qbl    %[temp2],   %[temp0]                \n\t"
@@ -887,7 +899,7 @@ static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
 static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw              %[temp0],   0(%[top])               \n\t"
    "ulw              %[temp1],   4(%[top])               \n\t"
    "preceu.ph.qbla   %[temp2],   %[temp0]                \n\t"
@@ -936,7 +948,7 @@ static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
 static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw              %[temp0],   -5(%[top])              \n\t"
    "ulw              %[temp1],   -1(%[top])              \n\t"
    "preceu.ph.qbla   %[temp2],   %[temp0]                \n\t"
@@ -983,7 +995,7 @@ static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {

 static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  __asm__ volatile (
+  __asm__ volatile(
    "ulw             %[temp0],   -5(%[top])              \n\t"
    "preceu.ph.qbl   %[temp1],   %[temp0]                \n\t"
    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
@@ -1071,6 +1083,7 @@ static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,

 #if !defined(WORK_AROUND_GCC)

+// clang-format off
 #define GET_SSE_INNER(A)                                                  \
  "lw               %[temp0],    " #A "(%[a])                  \n\t"      \
  "lw               %[temp1],    " #A "(%[b])                  \n\t"      \
@@ -1082,41 +1095,41 @@ static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
  "subq.ph          %[temp0],    %[temp0],    %[temp1]         \n\t"      \
  "dpa.w.ph         $ac0,        %[temp2],    %[temp2]         \n\t"      \
  "dpa.w.ph         $ac0,        %[temp0],    %[temp0]         \n\t"
+// clang-format on

-#define GET_SSE(A, B, C, D)               \
-  GET_SSE_INNER(A)                        \
-  GET_SSE_INNER(B)                        \
-  GET_SSE_INNER(C)                        \
+#define GET_SSE(A, B, C, D) \
+  GET_SSE_INNER(A)          \
+  GET_SSE_INNER(B)          \
+  GET_SSE_INNER(C)          \
  GET_SSE_INNER(D)

 static int SSE16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
                              const uint8_t* WEBP_RESTRICT b) {
  int count;
  int temp0, temp1, temp2, temp3;
-  __asm__ volatile (
-    "mult   $zero,    $zero                            \n\t"
-    GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
-    GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
-    GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
-    GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
-    GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
-    GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
-    GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
-    GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
-    GET_SSE( 8 * BPS, 4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS)
-    GET_SSE( 9 * BPS, 4 +  9 * BPS, 8 +  9 * BPS, 12 +  9 * BPS)
-    GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
-    GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
-    GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
-    GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
-    GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
-    GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
-    "mflo   %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
+  __asm__ volatile(
+      "mult   $zero,    $zero                            \n\t"      //
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)      //
+      GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)      //
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)      //
+      GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)      //
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)      //
+      GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)      //
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)      //
+      GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)      //
+      GET_SSE(8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS)      //
+      GET_SSE(9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS)      //
+      GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)  //
+      GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)  //
+      GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)  //
+      GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)  //
+      GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)  //
+      GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)  //
+      "mflo   %[count]                                   \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [count] "=&r"(count)
+      : [a] "r"(a), [b] "r"(b)
+      : "memory", "hi", "lo");
  return count;
 }

@@ -1124,22 +1137,21 @@ static int SSE16x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
                             const uint8_t* WEBP_RESTRICT b) {
  int count;
  int temp0, temp1, temp2, temp3;
-  __asm__ volatile (
-    "mult   $zero,    $zero                            \n\t"
-    GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
-    GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
-    GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
-    GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
-    GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
-    GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
-    GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
-    GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
-    "mflo   %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
+  __asm__ volatile(
+      "mult   $zero,    $zero                            \n\t"  //
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)  //
+      GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)  //
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)  //
+      GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)  //
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)  //
+      GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)  //
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)  //
+      GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)  //
+      "mflo   %[count]                                   \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [count] "=&r"(count)
+      : [a] "r"(a), [b] "r"(b)
+      : "memory", "hi", "lo");
  return count;
 }

@@ -1147,18 +1159,17 @@ static int SSE8x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
                            const uint8_t* WEBP_RESTRICT b) {
  int count;
  int temp0, temp1, temp2, temp3;
-  __asm__ volatile (
-    "mult   $zero,    $zero                            \n\t"
-    GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
-    GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
-    GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
-    GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
-    "mflo   %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
+  __asm__ volatile(
+      "mult   $zero,    $zero                            \n\t"  //
+      GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)       //
+      GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)       //
+      GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)       //
+      GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)       //
+      "mflo   %[count]                                   \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [count] "=&r"(count)
+      : [a] "r"(a), [b] "r"(b)
+      : "memory", "hi", "lo");
  return count;
 }

@@ -1166,15 +1177,14 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
                            const uint8_t* WEBP_RESTRICT b) {
  int count;
  int temp0, temp1, temp2, temp3;
-  __asm__ volatile (
-    "mult   $zero,    $zero                            \n\t"
-    GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
-    "mflo   %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
+  __asm__ volatile(
+      "mult   $zero,    $zero                            \n\t"  //
+      GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)               //
+      "mflo   %[count]                                   \n\t"
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [count] "=&r"(count)
+      : [a] "r"(a), [b] "r"(b)
+      : "memory", "hi", "lo");
  return count;
 }

@@ -1200,6 +1210,7 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
 // K - offset in bytes (kZigzag[n] * 4)
 // N - offset in bytes (n * 2)
 // N1 - offset in bytes ((n + 1) * 2)
+// clang-format off
 #define QUANTIZE_ONE(J, K, N, N1)                                         \
  "ulw         %[temp1],     " #J "(%[ppin])                 \n\t"        \
  "ulw         %[temp2],     " #J "(%[ppsharpen])            \n\t"        \
@@ -1285,44 +1296,42 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
  "sh          $0,           " #N1 "(%[pout])                \n\t"        \
  "usw         $0,           " #J "(%[ppin])                 \n\t"        \
 "3:                                                          \n\t"
+// clang-format on

 static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
                                   const VP8Matrix* WEBP_RESTRICT const mtx) {
-  int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
  int sign, coeff, level;
  int max_level = MAX_LEVEL;
  int max_level1 = max_level << 16 | max_level;
  int ret = 0;

-  int16_t* ppin             = &in[0];
-  int16_t* pout             = &out[0];
+  int16_t* ppin = &in[0];
+  int16_t* pout = &out[0];
  const uint16_t* ppsharpen = &mtx->sharpen[0];
  const uint32_t* ppzthresh = &mtx->zthresh[0];
-  const uint16_t* ppq       = &mtx->q[0];
-  const uint16_t* ppiq      = &mtx->iq[0];
-  const uint32_t* ppbias    = &mtx->bias[0];
+  const uint16_t* ppq = &mtx->q[0];
+  const uint16_t* ppiq = &mtx->iq[0];
+  const uint32_t* ppbias = &mtx->bias[0];

-  __asm__ volatile (
-    QUANTIZE_ONE( 0,  0,  0,  2)
-    QUANTIZE_ONE( 4,  8, 10, 12)
-    QUANTIZE_ONE( 8, 16,  4,  8)
-    QUANTIZE_ONE(12, 24, 14, 24)
-    QUANTIZE_ONE(16, 32,  6, 16)
-    QUANTIZE_ONE(20, 40, 22, 26)
-    QUANTIZE_ONE(24, 48, 18, 20)
-    QUANTIZE_ONE(28, 56, 28, 30)
+  __asm__ volatile(
+      QUANTIZE_ONE(0, 0, 0, 2)      //
+      QUANTIZE_ONE(4, 8, 10, 12)    //
+      QUANTIZE_ONE(8, 16, 4, 8)     //
+      QUANTIZE_ONE(12, 24, 14, 24)  //
+      QUANTIZE_ONE(16, 32, 6, 16)   //
+      QUANTIZE_ONE(20, 40, 22, 26)  //
+      QUANTIZE_ONE(24, 48, 18, 20)  //
+      QUANTIZE_ONE(28, 56, 28, 30)  //

-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [sign]"=&r"(sign), [coeff]"=&r"(coeff),
-      [level]"=&r"(level), [temp6]"=&r"(temp6), [ret]"+&r"(ret)
-    : [ppin]"r"(ppin), [pout]"r"(pout), [max_level1]"r"(max_level1),
-      [ppiq]"r"(ppiq), [max_level]"r"(max_level),
-      [ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
-      [ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
-    : "memory", "hi", "lo"
-  );
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+        [sign] "=&r"(sign), [coeff] "=&r"(coeff), [level] "=&r"(level),
+        [temp6] "=&r"(temp6), [ret] "+&r"(ret)
+      : [ppin] "r"(ppin), [pout] "r"(pout), [max_level1] "r"(max_level1),
+        [ppiq] "r"(ppiq), [max_level] "r"(max_level), [ppbias] "r"(ppbias),
+        [ppzthresh] "r"(ppzthresh), [ppsharpen] "r"(ppsharpen), [ppq] "r"(ppq)
+      : "memory", "hi", "lo");

  return (ret != 0);
 }
@@ -1330,7 +1339,7 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
 static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
                                     const VP8Matrix* WEBP_RESTRICT const mtx) {
  int nz;
-  nz  = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
  nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
  return nz;
 }
@@ -1341,6 +1350,7 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
 // temp0..temp7 holds tmp[0]..tmp[15]
 // A, B, C, D - offset in bytes to load from in buffer
 // TEMP0, TEMP1 - registers for corresponding tmp elements
+// clang-format off
 #define HORIZONTAL_PASS_WHT(A, B, C, D, TEMP0, TEMP1)                          \
  "lh              %[" #TEMP0 "],  " #A "(%[in])            \n\t"              \
  "lh              %[" #TEMP1 "],  " #B "(%[in])            \n\t"              \
@@ -1373,26 +1383,26 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
  "usw             %[" #TEMP2 "],  " #B "(%[out])                 \n\t"        \
  "usw             %[" #TEMP4 "],  " #C "(%[out])                 \n\t"        \
  "usw             %[" #TEMP6 "],  " #D "(%[out])                 \n\t"
+// clang-format on

 static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
                                    int16_t* WEBP_RESTRICT out) {
  int temp0, temp1, temp2, temp3, temp4;
  int temp5, temp6, temp7, temp8, temp9;

-  __asm__ volatile (
-    HORIZONTAL_PASS_WHT(  0,  32,  64,  96, temp0, temp1)
-    HORIZONTAL_PASS_WHT(128, 160, 192, 224, temp2, temp3)
-    HORIZONTAL_PASS_WHT(256, 288, 320, 352, temp4, temp5)
-    HORIZONTAL_PASS_WHT(384, 416, 448, 480, temp6, temp7)
-    VERTICAL_PASS_WHT(0,  8, 16, 24, temp0, temp2, temp4, temp6)
-    VERTICAL_PASS_WHT(4, 12, 20, 28, temp1, temp3, temp5, temp7)
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
-      [temp9]"=&r"(temp9)
-    : [in]"r"(in), [out]"r"(out)
-    : "memory"
-  );
+  __asm__ volatile(
+      HORIZONTAL_PASS_WHT(0, 32, 64, 96, temp0, temp1)              //
+      HORIZONTAL_PASS_WHT(128, 160, 192, 224, temp2, temp3)         //
+      HORIZONTAL_PASS_WHT(256, 288, 320, 352, temp4, temp5)         //
+      HORIZONTAL_PASS_WHT(384, 416, 448, 480, temp6, temp7)         //
+      VERTICAL_PASS_WHT(0, 8, 16, 24, temp0, temp2, temp4, temp6)   //
+      VERTICAL_PASS_WHT(4, 12, 20, 28, temp1, temp3, temp5, temp7)  //
+      : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+        [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+        [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8),
+        [temp9] "=&r"(temp9)
+      : [in] "r"(in), [out] "r"(out)
+      : "memory");
 }

 #undef VERTICAL_PASS_WHT
@@ -1401,6 +1411,7 @@ static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
 // macro for converting coefficients to bin
 // convert 8 coeffs at time
 // A, B, C, D - offsets in bytes to load from out buffer
+// clang-format off
 #define CONVERT_COEFFS_TO_BIN(A, B, C, D)                                      \
  "ulw        %[temp0],  " #A "(%[out])                \n\t"                   \
  "ulw        %[temp1],  " #B "(%[out])                \n\t"                   \
@@ -1466,12 +1477,13 @@ static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
  "lw         %[temp8],  0(%[temp3])                   \n\t"                   \
  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
  "sw         %[temp8],  0(%[temp3])                   \n\t"
+// clang-format on

 static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
                                       int start_block, int end_block,
                                       VP8Histogram* const histo) {
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  const int max_coeff = (MAX_COEFF_THRESH << 16) + MAX_COEFF_THRESH;
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
@@ -1480,15 +1492,14 @@ static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);

    // Convert coefficients to bin.
-    __asm__ volatile (
-      CONVERT_COEFFS_TO_BIN( 0,  4,  8, 12)
-      CONVERT_COEFFS_TO_BIN(16, 20, 24, 28)
-      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-        [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-        [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
-      : [dist]"r"(distribution), [out]"r"(out), [max_coeff]"r"(max_coeff)
-      : "memory"
-    );
+    __asm__ volatile(
+        CONVERT_COEFFS_TO_BIN(0, 4, 8, 12)     //
+        CONVERT_COEFFS_TO_BIN(16, 20, 24, 28)  //
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+          [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+          [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8)
+        : [dist] "r"(distribution), [out] "r"(out), [max_coeff] "r"(max_coeff)
+        : "memory");
  }
  VP8SetHistogramData(distribution, histo);
 }
--- a/src/dsp/enc_msa.c
+++ b/src/dsp/enc_msa.c
@@ -16,30 +16,32 @@
 #if defined(WEBP_USE_MSA)

 #include <stdlib.h>
+
 #include "src/dsp/msa_macro.h"
 #include "src/enc/vp8i_enc.h"

 //------------------------------------------------------------------------------
 // Transforms

-#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) do {  \
-  v4i32 a1_m, b1_m, c1_m, d1_m;                                     \
-  const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091);              \
-  const v4i32 sinpi8sqrt2 = __msa_fill_w(35468);                    \
-  v4i32 c_tmp1_m = in1 * sinpi8sqrt2;                               \
-  v4i32 c_tmp2_m = in3 * cospi8sqrt2minus1;                         \
-  v4i32 d_tmp1_m = in1 * cospi8sqrt2minus1;                         \
-  v4i32 d_tmp2_m = in3 * sinpi8sqrt2;                               \
-                                                                    \
-  ADDSUB2(in0, in2, a1_m, b1_m);                                    \
-  SRAI_W2_SW(c_tmp1_m, c_tmp2_m, 16);                               \
-  c_tmp2_m = c_tmp2_m + in3;                                        \
-  c1_m = c_tmp1_m - c_tmp2_m;                                       \
-  SRAI_W2_SW(d_tmp1_m, d_tmp2_m, 16);                               \
-  d_tmp1_m = d_tmp1_m + in1;                                        \
-  d1_m = d_tmp1_m + d_tmp2_m;                                       \
-  BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3);      \
-} while (0)
+#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3)    \
+  do {                                                           \
+    v4i32 a1_m, b1_m, c1_m, d1_m;                                \
+    const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091);         \
+    const v4i32 sinpi8sqrt2 = __msa_fill_w(35468);               \
+    v4i32 c_tmp1_m = in1 * sinpi8sqrt2;                          \
+    v4i32 c_tmp2_m = in3 * cospi8sqrt2minus1;                    \
+    v4i32 d_tmp1_m = in1 * cospi8sqrt2minus1;                    \
+    v4i32 d_tmp2_m = in3 * sinpi8sqrt2;                          \
+                                                                 \
+    ADDSUB2(in0, in2, a1_m, b1_m);                               \
+    SRAI_W2_SW(c_tmp1_m, c_tmp2_m, 16);                          \
+    c_tmp2_m = c_tmp2_m + in3;                                   \
+    c1_m = c_tmp1_m - c_tmp2_m;                                  \
+    SRAI_W2_SW(d_tmp1_m, d_tmp2_m, 16);                          \
+    d_tmp1_m = d_tmp1_m + in1;                                   \
+    d1_m = d_tmp1_m + d_tmp2_m;                                  \
+    BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
+  } while (0)

 static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
                                      const int16_t* WEBP_RESTRICT in,
@@ -48,7 +50,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
  v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
  v4i32 res0, res1, res2, res3;
  v16i8 dest0, dest1, dest2, dest3;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};

  LD_SH2(in, 8, input0, input1);
  UNPCK_SH_SW(input0, in0, in1);
@@ -59,10 +61,10 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
  SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
  TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
  LD_SB4(ref, BPS, dest0, dest1, dest2, dest3);
-  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
-             res0, res1, res2, res3);
-  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
-             res0, res1, res2, res3);
+  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
+             res2, res3);
+  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
+             res3);
  ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
  CLIP_SW4_0_255(res0, res1, res2, res3);
  PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
@@ -86,13 +88,13 @@ static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
  uint32_t in0, in1, in2, in3;
  v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
  v8i16 t0, t1, t2, t3;
-  v16u8 srcl0, srcl1, src0 = { 0 }, src1 = { 0 };
-  const v8i16 mask0 = { 0, 4, 8, 12, 1, 5, 9, 13 };
-  const v8i16 mask1 = { 3, 7, 11, 15, 2, 6, 10, 14 };
-  const v8i16 mask2 = { 4, 0, 5, 1, 6, 2, 7, 3 };
-  const v8i16 mask3 = { 0, 4, 1, 5, 2, 6, 3, 7 };
-  const v8i16 cnst0 = { 2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352 };
-  const v8i16 cnst1 = { 5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217 };
+  v16u8 srcl0, srcl1, src0 = {0}, src1 = {0};
+  const v8i16 mask0 = {0, 4, 8, 12, 1, 5, 9, 13};
+  const v8i16 mask1 = {3, 7, 11, 15, 2, 6, 10, 14};
+  const v8i16 mask2 = {4, 0, 5, 1, 6, 2, 7, 3};
+  const v8i16 mask3 = {0, 4, 1, 5, 2, 6, 3, 7};
+  const v8i16 cnst0 = {2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352};
+  const v8i16 cnst1 = {5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217};

  LW4(src, BPS, in0, in1, in2, in3);
  INSERT_W4_UB(in0, in1, in2, in3, src0);
@@ -136,29 +138,29 @@ static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,

 static void FTransformWHT_MSA(const int16_t* WEBP_RESTRICT in,
                              int16_t* WEBP_RESTRICT out) {
-  v8i16 in0 = { 0 };
-  v8i16 in1 = { 0 };
+  v8i16 in0 = {0};
+  v8i16 in1 = {0};
  v8i16 tmp0, tmp1, tmp2, tmp3;
  v8i16 out0, out1;
-  const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
-  const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
-  const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
-  const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
+  const v8i16 mask0 = {0, 1, 2, 3, 8, 9, 10, 11};
+  const v8i16 mask1 = {4, 5, 6, 7, 12, 13, 14, 15};
+  const v8i16 mask2 = {0, 4, 8, 12, 1, 5, 9, 13};
+  const v8i16 mask3 = {3, 7, 11, 15, 2, 6, 10, 14};

-  in0 = __msa_insert_h(in0, 0, in[  0]);
-  in0 = __msa_insert_h(in0, 1, in[ 64]);
+  in0 = __msa_insert_h(in0, 0, in[0]);
+  in0 = __msa_insert_h(in0, 1, in[64]);
  in0 = __msa_insert_h(in0, 2, in[128]);
  in0 = __msa_insert_h(in0, 3, in[192]);
-  in0 = __msa_insert_h(in0, 4, in[ 16]);
-  in0 = __msa_insert_h(in0, 5, in[ 80]);
+  in0 = __msa_insert_h(in0, 4, in[16]);
+  in0 = __msa_insert_h(in0, 5, in[80]);
  in0 = __msa_insert_h(in0, 6, in[144]);
  in0 = __msa_insert_h(in0, 7, in[208]);
-  in1 = __msa_insert_h(in1, 0, in[ 48]);
+  in1 = __msa_insert_h(in1, 0, in[48]);
  in1 = __msa_insert_h(in1, 1, in[112]);
  in1 = __msa_insert_h(in1, 2, in[176]);
  in1 = __msa_insert_h(in1, 3, in[240]);
-  in1 = __msa_insert_h(in1, 4, in[ 32]);
-  in1 = __msa_insert_h(in1, 5, in[ 96]);
+  in1 = __msa_insert_h(in1, 4, in[32]);
+  in1 = __msa_insert_h(in1, 5, in[96]);
  in1 = __msa_insert_h(in1, 6, in[160]);
  in1 = __msa_insert_h(in1, 7, in[224]);
  ADDSUB2(in0, in1, tmp0, tmp1);
@@ -176,14 +178,14 @@ static int TTransform_MSA(const uint8_t* WEBP_RESTRICT in,
                          const uint16_t* WEBP_RESTRICT w) {
  int sum;
  uint32_t in0_m, in1_m, in2_m, in3_m;
-  v16i8 src0 = { 0 };
+  v16i8 src0 = {0};
  v8i16 in0, in1, tmp0, tmp1, tmp2, tmp3;
  v4i32 dst0, dst1;
-  const v16i8 zero = { 0 };
-  const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
-  const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
-  const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
-  const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
+  const v16i8 zero = {0};
+  const v8i16 mask0 = {0, 1, 2, 3, 8, 9, 10, 11};
+  const v8i16 mask1 = {4, 5, 6, 7, 12, 13, 14, 15};
+  const v8i16 mask2 = {0, 4, 8, 12, 1, 5, 9, 13};
+  const v8i16 mask3 = {3, 7, 11, 15, 2, 6, 10, 14};

  LW4(in, BPS, in0_m, in1_m, in2_m, in3_m);
  INSERT_W4_SB(in0_m, in1_m, in2_m, in3_m, src0);
@@ -233,14 +235,14 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
                                 int start_block, int end_block,
                                 VP8Histogram* const histo) {
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
    {
      int k;
      v8i16 coeff0, coeff1;
-      const v8i16 zero = { 0 };
+      const v8i16 zero = {0};
      const v8i16 max_coeff_thr = __msa_ldi_h(MAX_COEFF_THRESH);
      LD_SH2(&out[0], 8, coeff0, coeff1);
      coeff0 = __msa_add_a_h(coeff0, zero);
@@ -269,7 +271,7 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
 // vertical
 static WEBP_INLINE void VE4(uint8_t* WEBP_RESTRICT dst,
                            const uint8_t* WEBP_RESTRICT top) {
-  const v16u8 A1 = { 0 };
+  const v16u8 A1 = {0};
  const uint64_t val_m = LD(top - 1);
  const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
  const v16u8 B = SLDI_UB(A, A, 1);
@@ -307,7 +309,7 @@ static WEBP_INLINE void DC4(uint8_t* WEBP_RESTRICT dst,

 static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
                            const uint8_t* WEBP_RESTRICT top) {
-  const v16u8 A2 = { 0 };
+  const v16u8 A2 = {0};
  const uint64_t val_m = LD(top - 5);
  const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
  const v16u8 A = (v16u8)__msa_insert_b((v16i8)A1, 8, top[3]);
@@ -328,7 +330,7 @@ static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,

 static WEBP_INLINE void LD4(uint8_t* WEBP_RESTRICT dst,
                            const uint8_t* WEBP_RESTRICT top) {
-  const v16u8 A1 = { 0 };
+  const v16u8 A1 = {0};
  const uint64_t val_m = LD(top);
  const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
  const v16u8 B = SLDI_UB(A, A, 1);
@@ -360,13 +362,13 @@ static WEBP_INLINE void VR4(uint8_t* WEBP_RESTRICT dst,
  DST(0, 0) = DST(1, 2) = AVG2(X, A);
  DST(1, 0) = DST(2, 2) = AVG2(A, B);
  DST(2, 0) = DST(3, 2) = AVG2(B, C);
-  DST(3, 0)             = AVG2(C, D);
-  DST(0, 3) =             AVG3(K, J, I);
-  DST(0, 2) =             AVG3(J, I, X);
+  DST(3, 0) = AVG2(C, D);
+  DST(0, 3) = AVG3(K, J, I);
+  DST(0, 2) = AVG3(J, I, X);
  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
-  DST(3, 1) =             AVG3(B, C, D);
+  DST(3, 1) = AVG3(B, C, D);
 }

 static WEBP_INLINE void VL4(uint8_t* WEBP_RESTRICT dst,
@@ -379,16 +381,16 @@ static WEBP_INLINE void VL4(uint8_t* WEBP_RESTRICT dst,
  const int F = top[5];
  const int G = top[6];
  const int H = top[7];
-  DST(0, 0) =             AVG2(A, B);
+  DST(0, 0) = AVG2(A, B);
  DST(1, 0) = DST(0, 2) = AVG2(B, C);
  DST(2, 0) = DST(1, 2) = AVG2(C, D);
  DST(3, 0) = DST(2, 2) = AVG2(D, E);
-  DST(0, 1) =             AVG3(A, B, C);
+  DST(0, 1) = AVG3(A, B, C);
  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
-              DST(3, 2) = AVG3(E, F, G);
-              DST(3, 3) = AVG3(F, G, H);
+  DST(3, 2) = AVG3(E, F, G);
+  DST(3, 3) = AVG3(F, G, H);
 }

 static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
@@ -397,14 +399,13 @@ static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
  const int J = top[-3];
  const int K = top[-4];
  const int L = top[-5];
-  DST(0, 0) =             AVG2(I, J);
+  DST(0, 0) = AVG2(I, J);
  DST(2, 0) = DST(0, 1) = AVG2(J, K);
  DST(2, 1) = DST(0, 2) = AVG2(K, L);
-  DST(1, 0) =             AVG3(I, J, K);
+  DST(1, 0) = AVG3(I, J, K);
  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
-  DST(3, 2) = DST(2, 2) =
-  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+  DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }

 static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
@@ -420,25 +421,25 @@ static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
  DST(0, 0) = DST(2, 1) = AVG2(I, X);
  DST(0, 1) = DST(2, 2) = AVG2(J, I);
  DST(0, 2) = DST(2, 3) = AVG2(K, J);
-  DST(0, 3)             = AVG2(L, K);
-  DST(3, 0)             = AVG3(A, B, C);
-  DST(2, 0)             = AVG3(X, A, B);
+  DST(0, 3) = AVG2(L, K);
+  DST(3, 0) = AVG3(A, B, C);
+  DST(2, 0) = AVG3(X, A, B);
  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
-  DST(1, 3)             = AVG3(L, K, J);
+  DST(1, 3) = AVG3(L, K, J);
 }

 static WEBP_INLINE void TM4(uint8_t* WEBP_RESTRICT dst,
                            const uint8_t* WEBP_RESTRICT top) {
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  const v8i16 TL = (v8i16)__msa_fill_h(top[-1]);
  const v8i16 L0 = (v8i16)__msa_fill_h(top[-2]);
  const v8i16 L1 = (v8i16)__msa_fill_h(top[-3]);
  const v8i16 L2 = (v8i16)__msa_fill_h(top[-4]);
  const v8i16 L3 = (v8i16)__msa_fill_h(top[-5]);
  const v16u8 T1 = LD_UB(top);
-  const v8i16 T  = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
+  const v8i16 T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
  const v8i16 d = T - TL;
  v8i16 r0, r1, r2, r3;
  ADD4(d, L0, d, L1, d, L2, d, L3, r0, r1, r2, r3);
@@ -466,10 +467,11 @@ static void Intra4Preds_MSA(uint8_t* WEBP_RESTRICT dst,

 // luma 16x16 prediction

-#define STORE16x16(out, dst) do {                                        \
-    ST_UB8(out, out, out, out, out, out, out, out, dst + 0 * BPS, BPS);  \
-    ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS);  \
-} while (0)
+#define STORE16x16(out, dst)                                            \
+  do {                                                                  \
+    ST_UB8(out, out, out, out, out, out, out, out, dst + 0 * BPS, BPS); \
+    ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS); \
+  } while (0)

 static WEBP_INLINE void VerticalPred16x16(uint8_t* WEBP_RESTRICT dst,
                                          const uint8_t* WEBP_RESTRICT top) {
@@ -508,7 +510,7 @@ static WEBP_INLINE void TrueMotion16x16(uint8_t* WEBP_RESTRICT dst,
    if (top != NULL) {
      int j;
      v8i16 d1, d2;
-      const v16i8 zero = { 0 };
+      const v16i8 zero = {0};
      const v8i16 TL = (v8i16)__msa_fill_h(left[-1]);
      const v16u8 T = LD_UB(top);
      ILVRL_B2_SH(zero, T, d1, d2);
@@ -554,17 +556,17 @@ static WEBP_INLINE void DCMode16x16(uint8_t* WEBP_RESTRICT dst,
    const v8u16 dctemp = dctop + dcleft;
    DC = HADD_UH_U32(dctemp);
    DC = (DC + 16) >> 5;
-  } else if (left != NULL) {   // left but no top
+  } else if (left != NULL) {  // left but no top
    const v16u8 rleft = LD_UB(left);
    const v8u16 dcleft = __msa_hadd_u_h(rleft, rleft);
    DC = HADD_UH_U32(dcleft);
    DC = (DC + DC + 16) >> 5;
-  } else if (top != NULL) {   // top but no left
+  } else if (top != NULL) {  // top but no left
    const v16u8 rtop = LD_UB(top);
    const v8u16 dctop = __msa_hadd_u_h(rtop, rtop);
    DC = HADD_UH_U32(dctop);
    DC = (DC + DC + 16) >> 5;
-  } else {   // no top, no left, nothing.
+  } else {  // no top, no left, nothing.
    DC = 0x80;
  }
  out = (v16u8)__msa_fill_b(DC);
@@ -582,21 +584,23 @@ static void Intra16Preds_MSA(uint8_t* WEBP_RESTRICT dst,

 // Chroma 8x8 prediction

-#define CALC_DC8(in, out) do {                              \
-  const v8u16 temp0 = __msa_hadd_u_h(in, in);               \
-  const v4u32 temp1 = __msa_hadd_u_w(temp0, temp0);         \
-  const v2i64 temp2 = (v2i64)__msa_hadd_u_d(temp1, temp1);  \
-  const v2i64 temp3 = __msa_splati_d(temp2, 1);             \
-  const v2i64 temp4 = temp3 + temp2;                        \
-  const v16i8 temp5 = (v16i8)__msa_srari_d(temp4, 4);       \
-  const v2i64 temp6 = (v2i64)__msa_splati_b(temp5, 0);      \
-  out = __msa_copy_s_d(temp6, 0);                           \
-} while (0)
+#define CALC_DC8(in, out)                                    \
+  do {                                                       \
+    const v8u16 temp0 = __msa_hadd_u_h(in, in);              \
+    const v4u32 temp1 = __msa_hadd_u_w(temp0, temp0);        \
+    const v2i64 temp2 = (v2i64)__msa_hadd_u_d(temp1, temp1); \
+    const v2i64 temp3 = __msa_splati_d(temp2, 1);            \
+    const v2i64 temp4 = temp3 + temp2;                       \
+    const v16i8 temp5 = (v16i8)__msa_srari_d(temp4, 4);      \
+    const v2i64 temp6 = (v2i64)__msa_splati_b(temp5, 0);     \
+    out = __msa_copy_s_d(temp6, 0);                          \
+  } while (0)

-#define STORE8x8(out, dst) do {                 \
-  SD4(out, out, out, out, dst + 0 * BPS, BPS);  \
-  SD4(out, out, out, out, dst + 4 * BPS, BPS);  \
-} while (0)
+#define STORE8x8(out, dst)                       \
+  do {                                           \
+    SD4(out, out, out, out, dst + 0 * BPS, BPS); \
+    SD4(out, out, out, out, dst + 4 * BPS, BPS); \
+  } while (0)

 static WEBP_INLINE void VerticalPred8x8(uint8_t* WEBP_RESTRICT dst,
                                        const uint8_t* WEBP_RESTRICT top) {
@@ -640,8 +644,8 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* WEBP_RESTRICT dst,
      int j;
      const v8i16 TL = (v8i16)__msa_fill_h(left[-1]);
      const v16u8 T1 = LD_UB(top);
-      const v16i8 zero = { 0 };
-      const v8i16 T  = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
+      const v16i8 zero = {0};
+      const v8i16 T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
      const v8i16 d = T - TL;
      for (j = 0; j < 8; j += 4) {
        uint64_t out0, out1, out2, out3;
@@ -677,21 +681,21 @@ static WEBP_INLINE void DCMode8x8(uint8_t* WEBP_RESTRICT dst,
                                  const uint8_t* WEBP_RESTRICT left,
                                  const uint8_t* WEBP_RESTRICT top) {
  uint64_t out;
-  v16u8 src = { 0 };
+  v16u8 src = {0};
  if (top != NULL && left != NULL) {
    const uint64_t left_m = LD(left);
    const uint64_t top_m = LD(top);
    INSERT_D2_UB(left_m, top_m, src);
    CALC_DC8(src, out);
-  } else if (left != NULL) {   // left but no top
+  } else if (left != NULL) {  // left but no top
    const uint64_t left_m = LD(left);
    INSERT_D2_UB(left_m, left_m, src);
    CALC_DC8(src, out);
-  } else if (top != NULL) {   // top but no left
+  } else if (top != NULL) {  // top but no left
    const uint64_t top_m = LD(top);
    INSERT_D2_UB(top_m, top_m, src);
    CALC_DC8(src, out);
-  } else {   // no top, no left, nothing.
+  } else {  // no top, no left, nothing.
    src = (v16u8)__msa_fill_b(0x80);
    out = __msa_copy_s_d((v2i64)src, 0);
  }
@@ -719,27 +723,29 @@ static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
 //------------------------------------------------------------------------------
 // Metric

-#define PACK_DOTP_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) do {  \
-  v16u8 tmp0, tmp1;                                                        \
-  v8i16 tmp2, tmp3;                                                        \
-  ILVRL_B2_UB(in0, in1, tmp0, tmp1);                                       \
-  HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                                     \
-  DOTP_SH2_SW(tmp2, tmp3, tmp2, tmp3, out0, out1);                         \
-  ILVRL_B2_UB(in2, in3, tmp0, tmp1);                                       \
-  HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                                     \
-  DOTP_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3);                         \
-} while (0)
+#define PACK_DOTP_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
+  do {                                                               \
+    v16u8 tmp0, tmp1;                                                \
+    v8i16 tmp2, tmp3;                                                \
+    ILVRL_B2_UB(in0, in1, tmp0, tmp1);                               \
+    HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                             \
+    DOTP_SH2_SW(tmp2, tmp3, tmp2, tmp3, out0, out1);                 \
+    ILVRL_B2_UB(in2, in3, tmp0, tmp1);                               \
+    HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                             \
+    DOTP_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3);                 \
+  } while (0)

-#define PACK_DPADD_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) do {  \
-  v16u8 tmp0, tmp1;                                                         \
-  v8i16 tmp2, tmp3;                                                         \
-  ILVRL_B2_UB(in0, in1, tmp0, tmp1);                                        \
-  HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                                      \
-  DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out0, out1);                         \
-  ILVRL_B2_UB(in2, in3, tmp0, tmp1);                                        \
-  HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                                      \
-  DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3);                         \
-} while (0)
+#define PACK_DPADD_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
+  do {                                                                \
+    v16u8 tmp0, tmp1;                                                 \
+    v8i16 tmp2, tmp3;                                                 \
+    ILVRL_B2_UB(in0, in1, tmp0, tmp1);                                \
+    HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                              \
+    DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out0, out1);                 \
+    ILVRL_B2_UB(in2, in3, tmp0, tmp1);                                \
+    HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3);                              \
+    DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3);                 \
+  } while (0)

 static int SSE16x16_MSA(const uint8_t* WEBP_RESTRICT a,
                        const uint8_t* WEBP_RESTRICT b) {
@@ -814,7 +820,7 @@ static int SSE4x4_MSA(const uint8_t* WEBP_RESTRICT a,
                      const uint8_t* WEBP_RESTRICT b) {
  uint32_t sum = 0;
  uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
-  v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
+  v16u8 src = {0}, ref = {0}, tmp0, tmp1;
  v8i16 diff0, diff1;
  v4i32 out0, out1;

@@ -839,9 +845,9 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
  v8i16 in0, in1, sh0, sh1, out0, out1;
  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
  v4i32 s0, s1, s2, s3, b0, b1, b2, b3, t0, t1, t2, t3;
-  const v8i16 zero = { 0 };
-  const v8i16 zigzag0 = { 0, 1, 4, 8, 5, 2, 3, 6 };
-  const v8i16 zigzag1 = { 9, 12, 13, 10, 7, 11, 14, 15 };
+  const v8i16 zero = {0};
+  const v8i16 zigzag0 = {0, 1, 4, 8, 5, 2, 3, 6};
+  const v8i16 zigzag1 = {9, 12, 13, 10, 7, 11, 14, 15};
  const v8i16 maxlevel = __msa_fill_h(MAX_LEVEL);

  LD_SH2(&in[0], 8, in0, in1);
@@ -852,11 +858,11 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
  ILVRL_H2_SH(sh1, tmp5, tmp2, tmp3);
  HADD_SH4_SW(tmp0, tmp1, tmp2, tmp3, s0, s1, s2, s3);
  sign0 = (in0 < zero);
-  sign1 = (in1 < zero);                           // sign
-  LD_SH2(&mtx->iq[0], 8, tmp0, tmp1);             // iq
+  sign1 = (in1 < zero);                // sign
+  LD_SH2(&mtx->iq[0], 8, tmp0, tmp1);  // iq
  ILVRL_H2_SW(zero, tmp0, t0, t1);
  ILVRL_H2_SW(zero, tmp1, t2, t3);
-  LD_SW4(&mtx->bias[0], 4, b0, b1, b2, b3);       // bias
+  LD_SW4(&mtx->bias[0], 4, b0, b1, b2, b3);  // bias
  MUL4(t0, s0, t1, s1, t2, s2, t3, s3, t0, t1, t2, t3);
  ADD4(b0, t0, b1, t1, b2, t2, b3, t3, b0, b1, b2, b3);
  SRAI_W4_SW(b0, b1, b2, b3, 17);
@@ -868,7 +874,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
  SUB2(zero, tmp2, zero, tmp3, tmp0, tmp1);
  tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)tmp0, (v16u8)sign0);
  tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)tmp1, (v16u8)sign1);
-  LD_SW4(&mtx->zthresh[0], 4, t0, t1, t2, t3);    // zthresh
+  LD_SW4(&mtx->zthresh[0], 4, t0, t1, t2, t3);  // zthresh
  t0 = (s0 > t0);
  t1 = (s1 > t1);
  t2 = (s2 > t2);
@@ -889,7 +895,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
 static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
                               const VP8Matrix* WEBP_RESTRICT const mtx) {
  int nz;
-  nz  = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
  nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
  return nz;
 }
--- a/src/dsp/enc_neon.c
+++ b/src/dsp/enc_neon.c
@@ -89,8 +89,8 @@ static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
                                          int16x8x2_t* const out) {
  // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
  // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
-  const int16x8x2_t tmp0 = vzipq_s16(in0, in1);   // a0 c0 a1 c1 a2 c2 ...
-                                                  // b0 d0 b1 d1 b2 d2 ...
+  const int16x8x2_t tmp0 = vzipq_s16(in0, in1);  // a0 c0 a1 c1 a2 c2 ...
+                                                 // b0 d0 b1 d1 b2 d2 ...
  *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
 }

@@ -105,17 +105,17 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
  const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
  const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
  const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]),
-                                vget_low_s16(rows->val[1]));   // in0 + in8
+                                vget_low_s16(rows->val[1]));  // in0 + in8
  const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]),
-                                vget_low_s16(rows->val[1]));   // in0 - in8
+                                vget_low_s16(rows->val[1]));  // in0 - in8
  // c = kC2 * in4 - kC1 * in12
  // d = kC1 * in4 + kC2 * in12
  const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
  const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
-  const int16x8_t D0 = vcombine_s16(a, b);      // D0 = a | b
-  const int16x8_t D1 = vcombine_s16(d, c);      // D1 = d | c
-  const int16x8_t E0 = vqaddq_s16(D0, D1);      // a+d | b+c
-  const int16x8_t E_tmp = vqsubq_s16(D0, D1);   // a-d | b-c
+  const int16x8_t D0 = vcombine_s16(a, b);     // D0 = a | b
+  const int16x8_t D1 = vcombine_s16(d, c);     // D1 = d | c
+  const int16x8_t E0 = vqaddq_s16(D0, D1);     // a+d | b+c
+  const int16x8_t E_tmp = vqsubq_s16(D0, D1);  // a-d | b-c
  const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
  Transpose8x2_NEON(E0, E1, rows);
 }
@@ -136,118 +136,118 @@ static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
                               const int16_t* WEBP_RESTRICT in,
                               uint8_t* WEBP_RESTRICT dst) {
  const int kBPS = BPS;
-  const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
+  const int16_t kC1C2[] = {kC1, kC2, 0, 0};

-  __asm__ volatile (
-    "vld1.16         {q1, q2}, [%[in]]           \n"
-    "vld1.16         {d0}, [%[kC1C2]]            \n"
+  __asm__ volatile(
+      "vld1.16         {q1, q2}, [%[in]]           \n"
+      "vld1.16         {d0}, [%[kC1C2]]            \n"

-    // d2: in[0]
-    // d3: in[8]
-    // d4: in[4]
-    // d5: in[12]
-    "vswp            d3, d4                      \n"
+      // d2: in[0]
+      // d3: in[8]
+      // d4: in[4]
+      // d5: in[12]
+      "vswp            d3, d4                      \n"

-    // q8 = {in[4], in[12]} * kC1 * 2 >> 16
-    // q9 = {in[4], in[12]} * kC2 >> 16
-    "vqdmulh.s16     q8, q2, d0[0]               \n"
-    "vqdmulh.s16     q9, q2, d0[1]               \n"
+      // q8 = {in[4], in[12]} * kC1 * 2 >> 16
+      // q9 = {in[4], in[12]} * kC2 >> 16
+      "vqdmulh.s16     q8, q2, d0[0]               \n"
+      "vqdmulh.s16     q9, q2, d0[1]               \n"

-    // d22 = a = in[0] + in[8]
-    // d23 = b = in[0] - in[8]
-    "vqadd.s16       d22, d2, d3                 \n"
-    "vqsub.s16       d23, d2, d3                 \n"
+      // d22 = a = in[0] + in[8]
+      // d23 = b = in[0] - in[8]
+      "vqadd.s16       d22, d2, d3                 \n"
+      "vqsub.s16       d23, d2, d3                 \n"

-    //  q8 = in[4]/[12] * kC1 >> 16
-    "vshr.s16        q8, q8, #1                  \n"
+      //  q8 = in[4]/[12] * kC1 >> 16
+      "vshr.s16        q8, q8, #1                  \n"

-    // Add {in[4], in[12]} back after the multiplication.
-    "vqadd.s16       q8, q2, q8                  \n"
+      // Add {in[4], in[12]} back after the multiplication.
+      "vqadd.s16       q8, q2, q8                  \n"

-    // d20 = c = in[4]*kC2 - in[12]*kC1
-    // d21 = d = in[4]*kC1 + in[12]*kC2
-    "vqsub.s16       d20, d18, d17               \n"
-    "vqadd.s16       d21, d19, d16               \n"
+      // d20 = c = in[4]*kC2 - in[12]*kC1
+      // d21 = d = in[4]*kC1 + in[12]*kC2
+      "vqsub.s16       d20, d18, d17               \n"
+      "vqadd.s16       d21, d19, d16               \n"

-    // d2 = tmp[0] = a + d
-    // d3 = tmp[1] = b + c
-    // d4 = tmp[2] = b - c
-    // d5 = tmp[3] = a - d
-    "vqadd.s16       d2, d22, d21                \n"
-    "vqadd.s16       d3, d23, d20                \n"
-    "vqsub.s16       d4, d23, d20                \n"
-    "vqsub.s16       d5, d22, d21                \n"
+      // d2 = tmp[0] = a + d
+      // d3 = tmp[1] = b + c
+      // d4 = tmp[2] = b - c
+      // d5 = tmp[3] = a - d
+      "vqadd.s16       d2, d22, d21                \n"
+      "vqadd.s16       d3, d23, d20                \n"
+      "vqsub.s16       d4, d23, d20                \n"
+      "vqsub.s16       d5, d22, d21                \n"

-    "vzip.16         q1, q2                      \n"
-    "vzip.16         q1, q2                      \n"
+      "vzip.16         q1, q2                      \n"
+      "vzip.16         q1, q2                      \n"

-    "vswp            d3, d4                      \n"
+      "vswp            d3, d4                      \n"

-    // q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
-    // q9 = {tmp[4], tmp[12]} * kC2 >> 16
-    "vqdmulh.s16     q8, q2, d0[0]               \n"
-    "vqdmulh.s16     q9, q2, d0[1]               \n"
+      // q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
+      // q9 = {tmp[4], tmp[12]} * kC2 >> 16
+      "vqdmulh.s16     q8, q2, d0[0]               \n"
+      "vqdmulh.s16     q9, q2, d0[1]               \n"

-    // d22 = a = tmp[0] + tmp[8]
-    // d23 = b = tmp[0] - tmp[8]
-    "vqadd.s16       d22, d2, d3                 \n"
-    "vqsub.s16       d23, d2, d3                 \n"
+      // d22 = a = tmp[0] + tmp[8]
+      // d23 = b = tmp[0] - tmp[8]
+      "vqadd.s16       d22, d2, d3                 \n"
+      "vqsub.s16       d23, d2, d3                 \n"

-    "vshr.s16        q8, q8, #1                  \n"
-    "vqadd.s16       q8, q2, q8                  \n"
+      "vshr.s16        q8, q8, #1                  \n"
+      "vqadd.s16       q8, q2, q8                  \n"

-    // d20 = c = in[4]*kC2 - in[12]*kC1
-    // d21 = d = in[4]*kC1 + in[12]*kC2
-    "vqsub.s16       d20, d18, d17               \n"
-    "vqadd.s16       d21, d19, d16               \n"
+      // d20 = c = in[4]*kC2 - in[12]*kC1
+      // d21 = d = in[4]*kC1 + in[12]*kC2
+      "vqsub.s16       d20, d18, d17               \n"
+      "vqadd.s16       d21, d19, d16               \n"

-    // d2 = tmp[0] = a + d
-    // d3 = tmp[1] = b + c
-    // d4 = tmp[2] = b - c
-    // d5 = tmp[3] = a - d
-    "vqadd.s16       d2, d22, d21                \n"
-    "vqadd.s16       d3, d23, d20                \n"
-    "vqsub.s16       d4, d23, d20                \n"
-    "vqsub.s16       d5, d22, d21                \n"
+      // d2 = tmp[0] = a + d
+      // d3 = tmp[1] = b + c
+      // d4 = tmp[2] = b - c
+      // d5 = tmp[3] = a - d
+      "vqadd.s16       d2, d22, d21                \n"
+      "vqadd.s16       d3, d23, d20                \n"
+      "vqsub.s16       d4, d23, d20                \n"
+      "vqsub.s16       d5, d22, d21                \n"

-    "vld1.32         d6[0], [%[ref]], %[kBPS]    \n"
-    "vld1.32         d6[1], [%[ref]], %[kBPS]    \n"
-    "vld1.32         d7[0], [%[ref]], %[kBPS]    \n"
-    "vld1.32         d7[1], [%[ref]], %[kBPS]    \n"
+      "vld1.32         d6[0], [%[ref]], %[kBPS]    \n"
+      "vld1.32         d6[1], [%[ref]], %[kBPS]    \n"
+      "vld1.32         d7[0], [%[ref]], %[kBPS]    \n"
+      "vld1.32         d7[1], [%[ref]], %[kBPS]    \n"

-    "sub         %[ref], %[ref], %[kBPS], lsl #2 \n"
+      "sub         %[ref], %[ref], %[kBPS], lsl #2 \n"

-    // (val) + 4 >> 3
-    "vrshr.s16       d2, d2, #3                  \n"
-    "vrshr.s16       d3, d3, #3                  \n"
-    "vrshr.s16       d4, d4, #3                  \n"
-    "vrshr.s16       d5, d5, #3                  \n"
+      // (val) + 4 >> 3
+      "vrshr.s16       d2, d2, #3                  \n"
+      "vrshr.s16       d3, d3, #3                  \n"
+      "vrshr.s16       d4, d4, #3                  \n"
+      "vrshr.s16       d5, d5, #3                  \n"

-    "vzip.16         q1, q2                      \n"
-    "vzip.16         q1, q2                      \n"
+      "vzip.16         q1, q2                      \n"
+      "vzip.16         q1, q2                      \n"

-    // Must accumulate before saturating
-    "vmovl.u8        q8, d6                      \n"
-    "vmovl.u8        q9, d7                      \n"
+      // Must accumulate before saturating
+      "vmovl.u8        q8, d6                      \n"
+      "vmovl.u8        q9, d7                      \n"

-    "vqadd.s16       q1, q1, q8                  \n"
-    "vqadd.s16       q2, q2, q9                  \n"
+      "vqadd.s16       q1, q1, q8                  \n"
+      "vqadd.s16       q2, q2, q9                  \n"

-    "vqmovun.s16     d0, q1                      \n"
-    "vqmovun.s16     d1, q2                      \n"
+      "vqmovun.s16     d0, q1                      \n"
+      "vqmovun.s16     d1, q2                      \n"

-    "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
-    "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
-    "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
-    "vst1.32         d1[1], [%[dst]]             \n"
+      "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
+      "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
+      "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
+      "vst1.32         d1[1], [%[dst]]             \n"

-    : [in] "+r"(in), [dst] "+r"(dst)               // modified registers
-    : [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref)  // constants
-    : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11"  // clobbered
+      : [in] "+r"(in), [dst] "+r"(dst)  // modified registers
+      : [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref)  // constants
+      : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11"  // clobbered
  );
 }

-#endif    // WEBP_USE_INTRINSICS
+#endif  // WEBP_USE_INTRINSICS

 static void ITransform_NEON(const uint8_t* WEBP_RESTRICT ref,
                            const int16_t* WEBP_RESTRICT in,
@@ -272,24 +272,19 @@ static uint8x16_t Load4x4_NEON(const uint8_t* src) {

 #if defined(WEBP_USE_INTRINSICS)

-static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A,
-                                              const int16x4_t B,
-                                              const int16x4_t C,
-                                              const int16x4_t D,
-                                              int16x8_t* const out01,
-                                              int16x8_t* const out32) {
+static WEBP_INLINE void Transpose4x4_S16_NEON(
+    const int16x4_t A, const int16x4_t B, const int16x4_t C, const int16x4_t D,
+    int16x8_t* const out01, int16x8_t* const out32) {
  const int16x4x2_t AB = vtrn_s16(A, B);
  const int16x4x2_t CD = vtrn_s16(C, D);
  const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]),
                                     vreinterpret_s32_s16(CD.val[0]));
  const int32x2x2_t tmp13 = vtrn_s32(vreinterpret_s32_s16(AB.val[1]),
                                     vreinterpret_s32_s16(CD.val[1]));
-  *out01 = vreinterpretq_s16_s64(
-      vcombine_s64(vreinterpret_s64_s32(tmp02.val[0]),
-                   vreinterpret_s64_s32(tmp13.val[0])));
-  *out32 = vreinterpretq_s16_s64(
-      vcombine_s64(vreinterpret_s64_s32(tmp13.val[1]),
-                   vreinterpret_s64_s32(tmp02.val[1])));
+  *out01 = vreinterpretq_s16_s64(vcombine_s64(
+      vreinterpret_s64_s32(tmp02.val[0]), vreinterpret_s64_s32(tmp13.val[0])));
+  *out32 = vreinterpretq_s16_s64(vcombine_s64(
+      vreinterpret_s64_s32(tmp13.val[1]), vreinterpret_s64_s32(tmp02.val[1])));
 }

 static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
@@ -300,7 +295,7 @@ static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
 static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
                            const uint8_t* WEBP_RESTRICT ref,
                            int16_t* WEBP_RESTRICT out) {
-  int16x8_t d0d1, d3d2;   // working 4x4 int16 variables
+  int16x8_t d0d1, d3d2;  // working 4x4 int16 variables
  {
    const uint8x16_t S0 = Load4x4_NEON(src);
    const uint8x16_t R0 = Load4x4_NEON(ref);
@@ -312,16 +307,16 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
    const int16x4_t D3 = vget_high_s16(D2D3);
    Transpose4x4_S16_NEON(D0, D1, D2, D3, &d0d1, &d3d2);
  }
-  {    // 1rst pass
+  {  // 1rst pass
    const int32x4_t kCst937 = vdupq_n_s32(937);
    const int32x4_t kCst1812 = vdupq_n_s32(1812);
-    const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2);   // d0+d3 | d1+d2   (=a0|a1)
-    const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2);   // d0-d3 | d1-d2   (=a3|a2)
+    const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2);  // d0+d3 | d1+d2   (=a0|a1)
+    const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2);  // d0-d3 | d1-d2   (=a3|a2)
    const int16x8_t a0a1_2 = vshlq_n_s16(a0a1, 3);
-    const int16x4_t tmp0 = vadd_s16(vget_low_s16(a0a1_2),
-                                    vget_high_s16(a0a1_2));
-    const int16x4_t tmp2 = vsub_s16(vget_low_s16(a0a1_2),
-                                    vget_high_s16(a0a1_2));
+    const int16x4_t tmp0 =
+        vadd_s16(vget_low_s16(a0a1_2), vget_high_s16(a0a1_2));
+    const int16x4_t tmp2 =
+        vsub_s16(vget_low_s16(a0a1_2), vget_high_s16(a0a1_2));
    const int32x4_t a3_2217 = vmull_n_s16(vget_low_s16(a3a2), 2217);
    const int32x4_t a2_2217 = vmull_n_s16(vget_high_s16(a3a2), 2217);
    const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352);
@@ -330,12 +325,12 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
    const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
    Transpose4x4_S16_NEON(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
  }
-  {    // 2nd pass
+  {  // 2nd pass
    // the (1<<16) addition is for the replacement: a3!=0  <-> 1-(a3==0)
    const int32x4_t kCst12000 = vdupq_n_s32(12000 + (1 << 16));
    const int32x4_t kCst51000 = vdupq_n_s32(51000);
-    const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2);   // d0+d3 | d1+d2   (=a0|a1)
-    const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2);   // d0-d3 | d1-d2   (=a3|a2)
+    const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2);  // d0+d3 | d1+d2   (=a0|a1)
+    const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2);  // d0-d3 | d1-d2   (=a3|a2)
    const int16x4_t a0_k7 = vadd_s16(vget_low_s16(a0a1), vdup_n_s16(7));
    const int16x4_t out0 = vshr_n_s16(vadd_s16(a0_k7, vget_high_s16(a0a1)), 4);
    const int16x4_t out2 = vshr_n_s16(vsub_s16(a0_k7, vget_high_s16(a0a1)), 4);
@@ -348,9 +343,9 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
    const int16x4_t a3_eq_0 =
        vreinterpret_s16_u16(vceq_s16(vget_low_s16(a3a2), vdup_n_s16(0)));
    const int16x4_t out1 = vadd_s16(tmp1, a3_eq_0);
-    vst1_s16(out +  0, out0);
-    vst1_s16(out +  4, out1);
-    vst1_s16(out +  8, out2);
+    vst1_s16(out + 0, out0);
+    vst1_s16(out + 4, out1);
+    vst1_s16(out + 8, out2);
    vst1_s16(out + 12, out3);
  }
 }
@@ -358,15 +353,11 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
 #else

 // adapted from vp8/encoder/arm/neon/shortfdct_neon.asm
-static const int16_t kCoeff16[] = {
-  5352,  5352,  5352, 5352, 2217,  2217,  2217, 2217
-};
-static const int32_t kCoeff32[] = {
-   1812,  1812,  1812,  1812,
-    937,   937,   937,   937,
-  12000, 12000, 12000, 12000,
-  51000, 51000, 51000, 51000
-};
+static const int16_t kCoeff16[] = {5352, 5352, 5352, 5352,
+                                   2217, 2217, 2217, 2217};
+static const int32_t kCoeff32[] = {1812,  1812,  1812,  1812,  937,   937,
+                                   937,   937,   12000, 12000, 12000, 12000,
+                                   51000, 51000, 51000, 51000};

 static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
                            const uint8_t* WEBP_RESTRICT ref,
@@ -377,117 +368,122 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
  const int16_t* coeff16 = kCoeff16;
  const int32_t* coeff32 = kCoeff32;

-  __asm__ volatile (
-    // load src into q4, q5 in high half
-    "vld1.8 {d8},  [%[src_ptr]], %[kBPS]      \n"
-    "vld1.8 {d10}, [%[src_ptr]], %[kBPS]      \n"
-    "vld1.8 {d9},  [%[src_ptr]], %[kBPS]      \n"
-    "vld1.8 {d11}, [%[src_ptr]]               \n"
+  __asm__ volatile(
+      // load src into q4, q5 in high half
+      "vld1.8 {d8},  [%[src_ptr]], %[kBPS]      \n"
+      "vld1.8 {d10}, [%[src_ptr]], %[kBPS]      \n"
+      "vld1.8 {d9},  [%[src_ptr]], %[kBPS]      \n"
+      "vld1.8 {d11}, [%[src_ptr]]               \n"

-    // load ref into q6, q7 in high half
-    "vld1.8 {d12}, [%[ref_ptr]], %[kBPS]      \n"
-    "vld1.8 {d14}, [%[ref_ptr]], %[kBPS]      \n"
-    "vld1.8 {d13}, [%[ref_ptr]], %[kBPS]      \n"
-    "vld1.8 {d15}, [%[ref_ptr]]               \n"
+      // load ref into q6, q7 in high half
+      "vld1.8 {d12}, [%[ref_ptr]], %[kBPS]      \n"
+      "vld1.8 {d14}, [%[ref_ptr]], %[kBPS]      \n"
+      "vld1.8 {d13}, [%[ref_ptr]], %[kBPS]      \n"
+      "vld1.8 {d15}, [%[ref_ptr]]               \n"

-    // Pack the high values in to q4 and q6
-    "vtrn.32     q4, q5                       \n"
-    "vtrn.32     q6, q7                       \n"
+      // Pack the high values in to q4 and q6
+      "vtrn.32     q4, q5                       \n"
+      "vtrn.32     q6, q7                       \n"

-    // d[0-3] = src - ref
-    "vsubl.u8    q0, d8, d12                  \n"
-    "vsubl.u8    q1, d9, d13                  \n"
+      // d[0-3] = src - ref
+      "vsubl.u8    q0, d8, d12                  \n"
+      "vsubl.u8    q1, d9, d13                  \n"

-    // load coeff16 into q8(d16=5352, d17=2217)
-    "vld1.16     {q8}, [%[coeff16]]           \n"
+      // load coeff16 into q8(d16=5352, d17=2217)
+      "vld1.16     {q8}, [%[coeff16]]           \n"

-    // load coeff32 high half into q9 = 1812, q10 = 937
-    "vld1.32     {q9, q10}, [%[coeff32]]!     \n"
+      // load coeff32 high half into q9 = 1812, q10 = 937
+      "vld1.32     {q9, q10}, [%[coeff32]]!     \n"

-    // load coeff32 low half into q11=12000, q12=51000
-    "vld1.32     {q11,q12}, [%[coeff32]]      \n"
+      // load coeff32 low half into q11=12000, q12=51000
+      "vld1.32     {q11,q12}, [%[coeff32]]      \n"

-    // part 1
-    // Transpose. Register dN is the same as dN in C
-    "vtrn.32         d0, d2                   \n"
-    "vtrn.32         d1, d3                   \n"
-    "vtrn.16         d0, d1                   \n"
-    "vtrn.16         d2, d3                   \n"
+      // part 1
+      // Transpose. Register dN is the same as dN in C
+      "vtrn.32         d0, d2                   \n"
+      "vtrn.32         d1, d3                   \n"
+      "vtrn.16         d0, d1                   \n"
+      "vtrn.16         d2, d3                   \n"

-    "vadd.s16        d4, d0, d3               \n" // a0 = d0 + d3
-    "vadd.s16        d5, d1, d2               \n" // a1 = d1 + d2
-    "vsub.s16        d6, d1, d2               \n" // a2 = d1 - d2
-    "vsub.s16        d7, d0, d3               \n" // a3 = d0 - d3
+      "vadd.s16        d4, d0, d3               \n"  // a0 = d0 + d3
+      "vadd.s16        d5, d1, d2               \n"  // a1 = d1 + d2
+      "vsub.s16        d6, d1, d2               \n"  // a2 = d1 - d2
+      "vsub.s16        d7, d0, d3               \n"  // a3 = d0 - d3

-    "vadd.s16        d0, d4, d5               \n" // a0 + a1
-    "vshl.s16        d0, d0, #3               \n" // temp[0+i*4] = (a0+a1) << 3
-    "vsub.s16        d2, d4, d5               \n" // a0 - a1
-    "vshl.s16        d2, d2, #3               \n" // (temp[2+i*4] = (a0-a1) << 3
+      "vadd.s16        d0, d4, d5               \n"  // a0 + a1
+      "vshl.s16        d0, d0, #3               \n"  // temp[0+i*4] = (a0+a1) <<
+                                                     // 3
+      "vsub.s16        d2, d4, d5               \n"  // a0 - a1
+      "vshl.s16        d2, d2, #3               \n"  // (temp[2+i*4] = (a0-a1)
+                                                     // << 3

-    "vmlal.s16       q9, d7, d16              \n" // a3*5352 + 1812
-    "vmlal.s16       q10, d7, d17             \n" // a3*2217 + 937
-    "vmlal.s16       q9, d6, d17              \n" // a2*2217 + a3*5352 + 1812
-    "vmlsl.s16       q10, d6, d16             \n" // a3*2217 + 937 - a2*5352
+      "vmlal.s16       q9, d7, d16              \n"  // a3*5352 + 1812
+      "vmlal.s16       q10, d7, d17             \n"  // a3*2217 + 937
+      "vmlal.s16       q9, d6, d17              \n"  // a2*2217 + a3*5352 + 1812
+      "vmlsl.s16       q10, d6, d16             \n"  // a3*2217 + 937 - a2*5352

-    // temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
-    // temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
-    "vshrn.s32       d1, q9, #9               \n"
-    "vshrn.s32       d3, q10, #9              \n"
+      // temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
+      // temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
+      "vshrn.s32       d1, q9, #9               \n"
+      "vshrn.s32       d3, q10, #9              \n"

-    // part 2
-    // transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
-    "vtrn.32         d0, d2                   \n"
-    "vtrn.32         d1, d3                   \n"
-    "vtrn.16         d0, d1                   \n"
-    "vtrn.16         d2, d3                   \n"
+      // part 2
+      // transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
+      "vtrn.32         d0, d2                   \n"
+      "vtrn.32         d1, d3                   \n"
+      "vtrn.16         d0, d1                   \n"
+      "vtrn.16         d2, d3                   \n"

-    "vmov.s16        d26, #7                  \n"
+      "vmov.s16        d26, #7                  \n"

-    "vadd.s16        d4, d0, d3               \n" // a1 = ip[0] + ip[12]
-    "vadd.s16        d5, d1, d2               \n" // b1 = ip[4] + ip[8]
-    "vsub.s16        d6, d1, d2               \n" // c1 = ip[4] - ip[8]
-    "vadd.s16        d4, d4, d26              \n" // a1 + 7
-    "vsub.s16        d7, d0, d3               \n" // d1 = ip[0] - ip[12]
+      "vadd.s16        d4, d0, d3               \n"  // a1 = ip[0] + ip[12]
+      "vadd.s16        d5, d1, d2               \n"  // b1 = ip[4] + ip[8]
+      "vsub.s16        d6, d1, d2               \n"  // c1 = ip[4] - ip[8]
+      "vadd.s16        d4, d4, d26              \n"  // a1 + 7
+      "vsub.s16        d7, d0, d3               \n"  // d1 = ip[0] - ip[12]

-    "vadd.s16        d0, d4, d5               \n" // op[0] = a1 + b1 + 7
-    "vsub.s16        d2, d4, d5               \n" // op[8] = a1 - b1 + 7
+      "vadd.s16        d0, d4, d5               \n"  // op[0] = a1 + b1 + 7
+      "vsub.s16        d2, d4, d5               \n"  // op[8] = a1 - b1 + 7

-    "vmlal.s16       q11, d7, d16             \n" // d1*5352 + 12000
-    "vmlal.s16       q12, d7, d17             \n" // d1*2217 + 51000
+      "vmlal.s16       q11, d7, d16             \n"  // d1*5352 + 12000
+      "vmlal.s16       q12, d7, d17             \n"  // d1*2217 + 51000

-    "vceq.s16        d4, d7, #0               \n"
+      "vceq.s16        d4, d7, #0               \n"

-    "vshr.s16        d0, d0, #4               \n"
-    "vshr.s16        d2, d2, #4               \n"
+      "vshr.s16        d0, d0, #4               \n"
+      "vshr.s16        d2, d2, #4               \n"

-    "vmlal.s16       q11, d6, d17             \n" // c1*2217 + d1*5352 + 12000
-    "vmlsl.s16       q12, d6, d16             \n" // d1*2217 - c1*5352 + 51000
+      "vmlal.s16       q11, d6, d17             \n"  // c1*2217 + d1*5352 +
+                                                     // 12000
+      "vmlsl.s16       q12, d6, d16             \n"  // d1*2217 - c1*5352 +
+                                                     // 51000

-    "vmvn            d4, d4                   \n" // !(d1 == 0)
-    // op[4] = (c1*2217 + d1*5352 + 12000)>>16
-    "vshrn.s32       d1, q11, #16             \n"
-    // op[4] += (d1!=0)
-    "vsub.s16        d1, d1, d4               \n"
-    // op[12]= (d1*2217 - c1*5352 + 51000)>>16
-    "vshrn.s32       d3, q12, #16             \n"
+      "vmvn            d4, d4                   \n"  // !(d1 == 0)
+      // op[4] = (c1*2217 + d1*5352 + 12000)>>16
+      "vshrn.s32       d1, q11, #16             \n"
+      // op[4] += (d1!=0)
+      "vsub.s16        d1, d1, d4               \n"
+      // op[12]= (d1*2217 - c1*5352 + 51000)>>16
+      "vshrn.s32       d3, q12, #16             \n"

-    // set result to out array
-    "vst1.16         {q0, q1}, [%[out]]   \n"
-    : [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
-      [coeff32] "+r"(coeff32)          // modified registers
-    : [kBPS] "r"(kBPS), [coeff16] "r"(coeff16),
-      [out] "r"(out)                   // constants
-    : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
-      "q10", "q11", "q12", "q13"       // clobbered
+      // set result to out array
+      "vst1.16         {q0, q1}, [%[out]]   \n"
+      : [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
+        [coeff32] "+r"(coeff32)  // modified registers
+      : [kBPS] "r"(kBPS), [coeff16] "r"(coeff16),
+        [out] "r"(out)  // constants
+      : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
+        "q10", "q11", "q12", "q13"  // clobbered
  );
 }

 #endif

-#define LOAD_LANE_16b(VALUE, LANE) do {             \
-  (VALUE) = vld1_lane_s16(src, (VALUE), (LANE));    \
-  src += stride;                                    \
-} while (0)
+#define LOAD_LANE_16b(VALUE, LANE)                 \
+  do {                                             \
+    (VALUE) = vld1_lane_s16(src, (VALUE), (LANE)); \
+    src += stride;                                 \
+  } while (0)

 static void FTransformWHT_NEON(const int16_t* WEBP_RESTRICT src,
                               int16_t* WEBP_RESTRICT out) {
@@ -546,9 +542,9 @@ static void FTransformWHT_NEON(const int16_t* WEBP_RESTRICT src,
    const int16x4_t out2 = vmovn_s32(b2);
    const int16x4_t out3 = vmovn_s32(b3);

-    vst1_s16(out +  0, out0);
-    vst1_s16(out +  4, out1);
-    vst1_s16(out +  8, out2);
+    vst1_s16(out + 0, out0);
+    vst1_s16(out + 4, out1);
+    vst1_s16(out + 8, out2);
    vst1_s16(out + 12, out3);
  }
 }
@@ -586,8 +582,8 @@ static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) {
  return q4_in;
 }

-static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
-    const int16x8x4_t q4_in) {
+static WEBP_INLINE int16x8x4_t
+DistoHorizontalPass_NEON(const int16x8x4_t q4_in) {
  // {a0, a1} = {in[0] + in[2], in[1] + in[3]}
  // {a3, a2} = {in[0] - in[2], in[1] - in[3]}
  const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
@@ -599,26 +595,24 @@ static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
  // tmp[1] = a3 + a2
  // tmp[2] = a3 - a2
  // tmp[3] = a0 - a1
-  INIT_VECTOR4(q4_out,
-               vabsq_s16(vaddq_s16(q_a0, q_a1)),
-               vabsq_s16(vaddq_s16(q_a3, q_a2)),
-               vabdq_s16(q_a3, q_a2), vabdq_s16(q_a0, q_a1));
+  INIT_VECTOR4(q4_out, vabsq_s16(vaddq_s16(q_a0, q_a1)),
+               vabsq_s16(vaddq_s16(q_a3, q_a2)), vabdq_s16(q_a3, q_a2),
+               vabdq_s16(q_a0, q_a1));
  return q4_out;
 }

 static WEBP_INLINE int16x8x4_t DistoVerticalPass_NEON(const uint8x8x4_t q4_in) {
-  const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0],
-                                                        q4_in.val[2]));
-  const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1],
-                                                        q4_in.val[3]));
-  const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[1],
-                                                        q4_in.val[3]));
-  const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[0],
-                                                        q4_in.val[2]));
+  const int16x8_t q_a0 =
+      vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0], q4_in.val[2]));
+  const int16x8_t q_a1 =
+      vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1], q4_in.val[3]));
+  const int16x8_t q_a2 =
+      vreinterpretq_s16_u16(vsubl_u8(q4_in.val[1], q4_in.val[3]));
+  const int16x8_t q_a3 =
+      vreinterpretq_s16_u16(vsubl_u8(q4_in.val[0], q4_in.val[2]));
  int16x8x4_t q4_out;

-  INIT_VECTOR4(q4_out,
-               vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
+  INIT_VECTOR4(q4_out, vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
               vsubq_s16(q_a3, q_a2), vsubq_s16(q_a0, q_a1));
  return q4_out;
 }
@@ -627,8 +621,7 @@ static WEBP_INLINE int16x4x4_t DistoLoadW_NEON(const uint16_t* w) {
  const uint16x8_t q_w07 = vld1q_u16(&w[0]);
  const uint16x8_t q_w8f = vld1q_u16(&w[8]);
  int16x4x4_t d4_w;
-  INIT_VECTOR4(d4_w,
-               vget_low_s16(vreinterpretq_s16_u16(q_w07)),
+  INIT_VECTOR4(d4_w, vget_low_s16(vreinterpretq_s16_u16(q_w07)),
               vget_high_s16(vreinterpretq_s16_u16(q_w07)),
               vget_low_s16(vreinterpretq_s16_u16(q_w8f)),
               vget_high_s16(vreinterpretq_s16_u16(q_w8f)));
@@ -660,7 +653,7 @@ static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
 }

 #define LOAD_LANE_32b(src, VALUE, LANE) \
-    (VALUE) = vld1_lane_u32((const uint32_t*)(src), (VALUE), (LANE))
+  (VALUE) = vld1_lane_u32((const uint32_t*)(src), (VALUE), (LANE))

 // Hadamard transform
 // Returns the weighted sum of the absolute value of transformed coefficients.
@@ -683,8 +676,7 @@ static int Disto4x4_NEON(const uint8_t* WEBP_RESTRICT const a,
  LOAD_LANE_32b(b + 1 * BPS, d_in_ab_4567, 1);
  LOAD_LANE_32b(b + 2 * BPS, d_in_ab_89ab, 1);
  LOAD_LANE_32b(b + 3 * BPS, d_in_ab_cdef, 1);
-  INIT_VECTOR4(d4_in,
-               vreinterpret_u8_u32(d_in_ab_0123),
+  INIT_VECTOR4(d4_in, vreinterpret_u8_u32(d_in_ab_0123),
               vreinterpret_u8_u32(d_in_ab_4567),
               vreinterpret_u8_u32(d_in_ab_89ab),
               vreinterpret_u8_u32(d_in_ab_cdef));
@@ -729,7 +721,7 @@ static void CollectHistogram_NEON(const uint8_t* WEBP_RESTRICT ref,
                                  VP8Histogram* WEBP_RESTRICT const histo) {
  const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    FTransform_NEON(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
@@ -762,10 +754,10 @@ static WEBP_INLINE void AccumulateSSE16_NEON(
  const uint8x16_t a0 = vld1q_u8(a);
  const uint8x16_t b0 = vld1q_u8(b);
  const uint8x16_t abs_diff = vabdq_u8(a0, b0);
-  const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
-                                    vget_low_u8(abs_diff));
-  const uint16x8_t prod2 = vmull_u8(vget_high_u8(abs_diff),
-                                    vget_high_u8(abs_diff));
+  const uint16x8_t prod1 =
+      vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
+  const uint16x8_t prod2 =
+      vmull_u8(vget_high_u8(abs_diff), vget_high_u8(abs_diff));
  /* pair-wise adds and widen */
  const uint32x4_t sum1 = vpaddlq_u16(prod1);
  const uint32x4_t sum2 = vpaddlq_u16(prod2);
@@ -823,10 +815,10 @@ static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
  const uint8x16_t a0 = Load4x4_NEON(a);
  const uint8x16_t b0 = Load4x4_NEON(b);
  const uint8x16_t abs_diff = vabdq_u8(a0, b0);
-  const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
-                                    vget_low_u8(abs_diff));
-  const uint16x8_t prod2 = vmull_u8(vget_high_u8(abs_diff),
-                                    vget_high_u8(abs_diff));
+  const uint16x8_t prod1 =
+      vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
+  const uint16x8_t prod2 =
+      vmull_u8(vget_high_u8(abs_diff), vget_high_u8(abs_diff));
  /* pair-wise adds and widen */
  const uint32x4_t sum1 = vpaddlq_u16(prod1);
  const uint32x4_t sum2 = vpaddlq_u16(prod2);
@@ -854,24 +846,22 @@ static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
  const uint32x4_t m0 = vmull_u16(vget_low_u16(c), vget_low_u16(iq));
  const uint32x4_t m1 = vmull_u16(vget_high_u16(c), vget_high_u16(iq));
  const uint32x4_t m2 = vhaddq_u32(m0, bias0);
-  const uint32x4_t m3 = vhaddq_u32(m1, bias1);     // (coeff * iQ + bias) >> 1
-  const uint16x8_t c0 = vcombine_u16(vshrn_n_u32(m2, 16),
-                                     vshrn_n_u32(m3, 16));   // QFIX=17 = 16+1
+  const uint32x4_t m3 = vhaddq_u32(m1, bias1);  // (coeff * iQ + bias) >> 1
+  const uint16x8_t c0 =
+      vcombine_u16(vshrn_n_u32(m2, 16), vshrn_n_u32(m3, 16));  // QFIX=17 = 16+1
  const uint16x8_t c1 = vminq_u16(c0, vdupq_n_u16(MAX_LEVEL));
  const int16x8_t c2 = veorq_s16(vreinterpretq_s16_u16(c1), sign);
-  const int16x8_t c3 = vsubq_s16(c2, sign);                  // restore sign
+  const int16x8_t c3 = vsubq_s16(c2, sign);  // restore sign
  const int16x8_t c4 = vmulq_s16(c3, vreinterpretq_s16_u16(q));
  vst1q_s16(in + offset, c4);
  assert(QFIX == 17);  // this function can't work as is if QFIX != 16+1
  return c3;
 }

-static const uint8_t kShuffles[4][8] = {
-  { 0,   1,  2,  3,  8,  9, 16, 17 },
-  { 10, 11,  4,  5,  6,  7, 12, 13 },
-  { 18, 19, 24, 25, 26, 27, 20, 21 },
-  { 14, 15, 22, 23, 28, 29, 30, 31 }
-};
+static const uint8_t kShuffles[4][8] = {{0, 1, 2, 3, 8, 9, 16, 17},
+                                        {10, 11, 4, 5, 6, 7, 12, 13},
+                                        {18, 19, 24, 25, 26, 27, 20, 21},
+                                        {14, 15, 22, 23, 28, 29, 30, 31}};

 static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
                              const VP8Matrix* WEBP_RESTRICT const mtx) {
@@ -880,37 +870,34 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
  uint8x8x4_t shuffles;
  // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
  // non-standard versions there.
-#if defined(__APPLE__) && WEBP_AARCH64 && \
-    defined(__apple_build_version__) && (__apple_build_version__< 6020037)
+#if defined(__APPLE__) && WEBP_AARCH64 && defined(__apple_build_version__) && \
+    (__apple_build_version__ < 6020037)
  uint8x16x2_t all_out;
  INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
-  INIT_VECTOR4(shuffles,
-               vtbl2q_u8(all_out, vld1_u8(kShuffles[0])),
+  INIT_VECTOR4(shuffles, vtbl2q_u8(all_out, vld1_u8(kShuffles[0])),
               vtbl2q_u8(all_out, vld1_u8(kShuffles[1])),
               vtbl2q_u8(all_out, vld1_u8(kShuffles[2])),
               vtbl2q_u8(all_out, vld1_u8(kShuffles[3])));
 #else
  uint8x8x4_t all_out;
-  INIT_VECTOR4(all_out,
-               vreinterpret_u8_s16(vget_low_s16(out0)),
+  INIT_VECTOR4(all_out, vreinterpret_u8_s16(vget_low_s16(out0)),
               vreinterpret_u8_s16(vget_high_s16(out0)),
               vreinterpret_u8_s16(vget_low_s16(out1)),
               vreinterpret_u8_s16(vget_high_s16(out1)));
-  INIT_VECTOR4(shuffles,
-               vtbl4_u8(all_out, vld1_u8(kShuffles[0])),
+  INIT_VECTOR4(shuffles, vtbl4_u8(all_out, vld1_u8(kShuffles[0])),
               vtbl4_u8(all_out, vld1_u8(kShuffles[1])),
               vtbl4_u8(all_out, vld1_u8(kShuffles[2])),
               vtbl4_u8(all_out, vld1_u8(kShuffles[3])));
 #endif
  // Zigzag reordering
-  vst1_u8((uint8_t*)(out +  0), shuffles.val[0]);
-  vst1_u8((uint8_t*)(out +  4), shuffles.val[1]);
-  vst1_u8((uint8_t*)(out +  8), shuffles.val[2]);
+  vst1_u8((uint8_t*)(out + 0), shuffles.val[0]);
+  vst1_u8((uint8_t*)(out + 4), shuffles.val[1]);
+  vst1_u8((uint8_t*)(out + 8), shuffles.val[2]);
  vst1_u8((uint8_t*)(out + 12), shuffles.val[3]);
  // test zeros
-  if (*(uint64_t*)(out +  0) != 0) return 1;
-  if (*(uint64_t*)(out +  4) != 0) return 1;
-  if (*(uint64_t*)(out +  8) != 0) return 1;
+  if (*(uint64_t*)(out + 0) != 0) return 1;
+  if (*(uint64_t*)(out + 4) != 0) return 1;
+  if (*(uint64_t*)(out + 8) != 0) return 1;
  if (*(uint64_t*)(out + 12) != 0) return 1;
  return 0;
 }
@@ -918,31 +905,31 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
 static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
                                const VP8Matrix* WEBP_RESTRICT const mtx) {
  int nz;
-  nz  = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
  nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
  return nz;
 }

-#endif   // !WORK_AROUND_GCC
+#endif  // !WORK_AROUND_GCC

 #if WEBP_AARCH64

 #if BPS == 32
-#define DC4_VE4_HE4_TM4_NEON(dst, tbl, res, lane)                              \
-  do {                                                                         \
-    uint8x16_t r;                                                              \
-    r = vqtbl2q_u8(qcombined, tbl);                                            \
-    r = vreinterpretq_u8_u32(                                                  \
-        vsetq_lane_u32(vget_lane_u32(vreinterpret_u32_u8(res), lane),          \
-                       vreinterpretq_u32_u8(r), 1));                           \
-    vst1q_u8(dst, r);                                                          \
+#define DC4_VE4_HE4_TM4_NEON(dst, tbl, res, lane)                     \
+  do {                                                                \
+    uint8x16_t r;                                                     \
+    r = vqtbl2q_u8(qcombined, tbl);                                   \
+    r = vreinterpretq_u8_u32(                                         \
+        vsetq_lane_u32(vget_lane_u32(vreinterpret_u32_u8(res), lane), \
+                       vreinterpretq_u32_u8(r), 1));                  \
+    vst1q_u8(dst, r);                                                 \
  } while (0)

-#define RD4_VR4_LD4_VL4_NEON(dst, tbl)                                         \
-  do {                                                                         \
-    uint8x16_t r;                                                              \
-    r = vqtbl2q_u8(qcombined, tbl);                                            \
-    vst1q_u8(dst, r);                                                          \
+#define RD4_VR4_LD4_VL4_NEON(dst, tbl) \
+  do {                                 \
+    uint8x16_t r;                      \
+    r = vqtbl2q_u8(qcombined, tbl);    \
+    vst1q_u8(dst, r);                  \
  } while (0)

 static WEBP_INLINE uint8x8x2_t Vld1U8x2(const uint8_t* ptr) {
@@ -960,8 +947,7 @@ static WEBP_INLINE uint8x16x4_t Vld1qU8x4(const uint8_t* ptr) {
  return vld1q_u8_x4(ptr);
 #else
  uint8x16x4_t res;
-  INIT_VECTOR4(res,
-               vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
+  INIT_VECTOR4(res, vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
               vld1q_u8(ptr + 2 * 16), vld1q_u8(ptr + 3 * 16));
  return res;
 #endif
@@ -973,25 +959,22 @@ static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
  //     L   K   J   I   X   A   B   C   D   E   F   G   H
  //    -5  -4  -3  -2  -1   0   1   2   3   4   5   6   7
  static const uint8_t kLookupTbl1[64] = {
-    0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 12, 12,
-    3,  3,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  0,  0,  0,  0,
-    4, 20, 21, 22,  3, 18,  2, 17,  3, 19,  4, 20,  2, 17,  1, 16,
-    2, 18,  3, 19,  1, 16, 31, 31,  1, 17,  2, 18, 31, 31, 31, 31
-  };
+      0, 0,  1,  2,  3, 4,  5,  6,  7, 8,  9, 10, 11, 12, 12, 12,
+      3, 3,  3,  3,  2, 2,  2,  2,  1, 1,  1, 1,  0,  0,  0,  0,
+      4, 20, 21, 22, 3, 18, 2,  17, 3, 19, 4, 20, 2,  17, 1,  16,
+      2, 18, 3,  19, 1, 16, 31, 31, 1, 17, 2, 18, 31, 31, 31, 31};

  static const uint8_t kLookupTbl2[64] = {
-    20, 21, 22, 23,  5,  6,  7,  8, 22, 23, 24, 25,  6,  7,  8,  9,
-    19, 20, 21, 22, 20, 21, 22, 23, 23, 24, 25, 26, 22, 23, 24, 25,
-    18, 19, 20, 21, 19,  5,  6,  7, 24, 25, 26, 27,  7,  8,  9, 26,
-    17, 18, 19, 20, 18, 20, 21, 22, 25, 26, 27, 28, 23, 24, 25, 27
-  };
+      20, 21, 22, 23, 5,  6,  7,  8,  22, 23, 24, 25, 6,  7,  8,  9,
+      19, 20, 21, 22, 20, 21, 22, 23, 23, 24, 25, 26, 22, 23, 24, 25,
+      18, 19, 20, 21, 19, 5,  6,  7,  24, 25, 26, 27, 7,  8,  9,  26,
+      17, 18, 19, 20, 18, 20, 21, 22, 25, 26, 27, 28, 23, 24, 25, 27};

  static const uint8_t kLookupTbl3[64] = {
-    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 19, 19, 19, 19,
-    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 18, 18, 18, 18,
-    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 17, 17, 17, 17,
-    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 16, 16, 16, 16
-  };
+      30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 19, 19, 19, 19,
+      30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 18, 18, 18, 18,
+      30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 17, 17, 17, 17,
+      30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 16, 16, 16, 16};

  const uint8x16x4_t lookup_avgs1 = Vld1qU8x4(kLookupTbl1);
  const uint8x16x4_t lookup_avgs2 = Vld1qU8x4(kLookupTbl2);
@@ -1210,7 +1193,7 @@ static void Intra16Preds_NEON(uint8_t* WEBP_RESTRICT dst,
  TrueMotion_NEON(I16TM16 + dst, left, top);
 }

-#endif // WEBP_AARCH64
+#endif  // WEBP_AARCH64

 //------------------------------------------------------------------------------
 // Entry point
--- a/src/dsp/enc_sse2.c
+++ b/src/dsp/enc_sse2.c
@@ -14,9 +14,8 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE2)
-#include <emmintrin.h>
-
 #include <assert.h>
+#include <emmintrin.h>
 #include <stdlib.h>  // for abs()
 #include <string.h>

@@ -50,10 +49,10 @@ static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
  //   variable and the multiplication of that variable by the associated
  //   constant:
  //      (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
-  const __m128i k1k2 = _mm_set_epi16(-30068, -30068, -30068, -30068,
-                                     20091, 20091, 20091, 20091);
-  const __m128i k2k1 = _mm_set_epi16(20091, 20091, 20091, 20091,
-                                     -30068, -30068, -30068, -30068);
+  const __m128i k1k2 =
+      _mm_set_epi16(-30068, -30068, -30068, -30068, 20091, 20091, 20091, 20091);
+  const __m128i k2k1 =
+      _mm_set_epi16(20091, 20091, 20091, 20091, -30068, -30068, -30068, -30068);
  const __m128i zero = _mm_setzero_si128();
  const __m128i zero_four = _mm_set_epi16(0, 0, 0, 0, 4, 4, 4, 4);
  __m128i T01, T23;
@@ -172,7 +171,7 @@ static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
    // Unsigned saturate to 8b.
    ref0123 = _mm_packus_epi16(ref01, ref23);

-    _mm_storeu_si128((__m128i *)buf, ref0123);
+    _mm_storeu_si128((__m128i*)buf, ref0123);

    // Store four bytes/pixels per line.
    WebPInt32ToMem(&dst[0 * BPS], buf[0]);
@@ -259,8 +258,8 @@ static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
    // multiplications.
    const __m128i four = _mm_set1_epi16(4);
    const __m128i dc = _mm_add_epi16(T0, four);
-    const __m128i a =  _mm_add_epi16(dc, T2);
-    const __m128i b =  _mm_sub_epi16(dc, T2);
+    const __m128i a = _mm_add_epi16(dc, T2);
+    const __m128i b = _mm_sub_epi16(dc, T2);
    // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
    const __m128i c1 = _mm_mulhi_epi16(T1, k2);
    const __m128i c2 = _mm_mulhi_epi16(T3, k1);
@@ -325,8 +324,7 @@ static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
 // Does one or two inverse transforms.
 static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,
                            const int16_t* WEBP_RESTRICT in,
-                            uint8_t* WEBP_RESTRICT dst,
-                            int do_two) {
+                            uint8_t* WEBP_RESTRICT dst, int do_two) {
  if (do_two) {
    ITransform_Two_SSE2(ref, in, dst);
  } else {
@@ -336,17 +334,16 @@ static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,

 static void FTransformPass1_SSE2(const __m128i* const in01,
                                 const __m128i* const in23,
-                                 __m128i* const out01,
-                                 __m128i* const out32) {
+                                 __m128i* const out01, __m128i* const out32) {
  const __m128i k937 = _mm_set1_epi32(937);
  const __m128i k1812 = _mm_set1_epi32(1812);

  const __m128i k88p = _mm_set_epi16(8, 8, 8, 8, 8, 8, 8, 8);
  const __m128i k88m = _mm_set_epi16(-8, 8, -8, 8, -8, 8, -8, 8);
-  const __m128i k5352_2217p = _mm_set_epi16(2217, 5352, 2217, 5352,
-                                            2217, 5352, 2217, 5352);
-  const __m128i k5352_2217m = _mm_set_epi16(-5352, 2217, -5352, 2217,
-                                            -5352, 2217, -5352, 2217);
+  const __m128i k5352_2217p =
+      _mm_set_epi16(2217, 5352, 2217, 5352, 2217, 5352, 2217, 5352);
+  const __m128i k5352_2217m =
+      _mm_set_epi16(-5352, 2217, -5352, 2217, -5352, 2217, -5352, 2217);

  // *in01 = 00 01 10 11 02 03 12 13
  // *in23 = 20 21 30 31 22 23 32 33
@@ -363,19 +360,19 @@ static void FTransformPass1_SSE2(const __m128i* const in01,
  // [d0 + d3 | d1 + d2 | ...] = [a0 a1 | a0' a1' | ... ]
  // [d0 - d3 | d1 - d2 | ...] = [a3 a2 | a3' a2' | ... ]

-  const __m128i tmp0   = _mm_madd_epi16(a01, k88p);  // [ (a0 + a1) << 3, ... ]
-  const __m128i tmp2   = _mm_madd_epi16(a01, k88m);  // [ (a0 - a1) << 3, ... ]
+  const __m128i tmp0 = _mm_madd_epi16(a01, k88p);  // [ (a0 + a1) << 3, ... ]
+  const __m128i tmp2 = _mm_madd_epi16(a01, k88m);  // [ (a0 - a1) << 3, ... ]
  const __m128i tmp1_1 = _mm_madd_epi16(a32, k5352_2217p);
  const __m128i tmp3_1 = _mm_madd_epi16(a32, k5352_2217m);
  const __m128i tmp1_2 = _mm_add_epi32(tmp1_1, k1812);
  const __m128i tmp3_2 = _mm_add_epi32(tmp3_1, k937);
-  const __m128i tmp1   = _mm_srai_epi32(tmp1_2, 9);
-  const __m128i tmp3   = _mm_srai_epi32(tmp3_2, 9);
-  const __m128i s03    = _mm_packs_epi32(tmp0, tmp2);
-  const __m128i s12    = _mm_packs_epi32(tmp1, tmp3);
-  const __m128i s_lo   = _mm_unpacklo_epi16(s03, s12);   // 0 1 0 1 0 1...
-  const __m128i s_hi   = _mm_unpackhi_epi16(s03, s12);   // 2 3 2 3 2 3
-  const __m128i v23    = _mm_unpackhi_epi32(s_lo, s_hi);
+  const __m128i tmp1 = _mm_srai_epi32(tmp1_2, 9);
+  const __m128i tmp3 = _mm_srai_epi32(tmp3_2, 9);
+  const __m128i s03 = _mm_packs_epi32(tmp0, tmp2);
+  const __m128i s12 = _mm_packs_epi32(tmp1, tmp3);
+  const __m128i s_lo = _mm_unpacklo_epi16(s03, s12);  // 0 1 0 1 0 1...
+  const __m128i s_hi = _mm_unpackhi_epi16(s03, s12);  // 2 3 2 3 2 3
+  const __m128i v23 = _mm_unpackhi_epi32(s_lo, s_hi);
  *out01 = _mm_unpacklo_epi32(s_lo, s_hi);
  *out32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));  // 3 2 3 2 3 2..
 }
@@ -385,10 +382,10 @@ static void FTransformPass2_SSE2(const __m128i* const v01,
                                 int16_t* WEBP_RESTRICT out) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i seven = _mm_set1_epi16(7);
-  const __m128i k5352_2217 = _mm_set_epi16(5352,  2217, 5352,  2217,
-                                           5352,  2217, 5352,  2217);
-  const __m128i k2217_5352 = _mm_set_epi16(2217, -5352, 2217, -5352,
-                                           2217, -5352, 2217, -5352);
+  const __m128i k5352_2217 =
+      _mm_set_epi16(5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217);
+  const __m128i k2217_5352 =
+      _mm_set_epi16(2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352);
  const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
  const __m128i k51000 = _mm_set1_epi32(51000);

@@ -537,8 +534,8 @@ static void FTransformWHTRow_SSE2(const int16_t* WEBP_RESTRICT const in,
  const __m128i src3 = _mm_loadl_epi64((__m128i*)&in[3 * 16]);
  const __m128i A01 = _mm_unpacklo_epi16(src0, src1);  // A0 A1 | ...
  const __m128i A23 = _mm_unpacklo_epi16(src2, src3);  // A2 A3 | ...
-  const __m128i B0 = _mm_adds_epi16(A01, A23);    // a0 | a1 | ...
-  const __m128i B1 = _mm_subs_epi16(A01, A23);    // a3 | a2 | ...
+  const __m128i B0 = _mm_adds_epi16(A01, A23);         // a0 | a1 | ...
+  const __m128i B1 = _mm_subs_epi16(A01, A23);         // a3 | a2 | ...
  const __m128i C0 = _mm_unpacklo_epi32(B0, B1);  // a0 | a1 | a3 | a2 | ...
  const __m128i C1 = _mm_unpacklo_epi32(B1, B0);  // a3 | a2 | a0 | a1 | ...
  const __m128i D = _mm_unpacklo_epi64(C0, C1);   // a0 a1 a3 a2 a3 a2 a0 a1
@@ -586,7 +583,7 @@ static void CollectHistogram_SSE2(const uint8_t* WEBP_RESTRICT ref,
  const __m128i zero = _mm_setzero_si128();
  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    int k;
@@ -600,7 +597,7 @@ static void CollectHistogram_SSE2(const uint8_t* WEBP_RESTRICT ref,
      const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
      const __m128i d0 = _mm_sub_epi16(zero, out0);
      const __m128i d1 = _mm_sub_epi16(zero, out1);
-      const __m128i abs0 = _mm_max_epi16(out0, d0);   // abs(v), 16b
+      const __m128i abs0 = _mm_max_epi16(out0, d0);  // abs(v), 16b
      const __m128i abs1 = _mm_max_epi16(out1, d1);
      // v = abs(out) >> 3
      const __m128i v0 = _mm_srai_epi16(abs0, 3);
@@ -927,7 +924,7 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* WEBP_RESTRICT dst,
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcdefg));
  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
@@ -950,8 +947,8 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* WEBP_RESTRICT dst,
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcd    ));
-  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               efgh    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcd));
+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(efgh));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));

@@ -978,8 +975,8 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* WEBP_RESTRICT dst,
  const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
  const uint32_t extra_out =
      (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
-  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               avg1    ));
-  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               avg4    ));
+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(avg1));
+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(avg4));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));

@@ -1000,7 +997,7 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* WEBP_RESTRICT dst,
  const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
  const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
-  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(abcdefg));
  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
@@ -1012,14 +1009,13 @@ static WEBP_INLINE void HU4_SSE2(uint8_t* WEBP_RESTRICT dst,
  const int J = top[-3];
  const int K = top[-4];
  const int L = top[-5];
-  DST(0, 0) =             AVG2(I, J);
+  DST(0, 0) = AVG2(I, J);
  DST(2, 0) = DST(0, 1) = AVG2(J, K);
  DST(2, 1) = DST(0, 2) = AVG2(K, L);
-  DST(1, 0) =             AVG3(I, J, K);
+  DST(1, 0) = AVG3(I, J, K);
  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
-  DST(3, 2) = DST(2, 2) =
-  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+  DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }

 static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
@@ -1036,14 +1032,14 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
  DST(0, 0) = DST(2, 1) = AVG2(I, X);
  DST(0, 1) = DST(2, 2) = AVG2(J, I);
  DST(0, 2) = DST(2, 3) = AVG2(K, J);
-  DST(0, 3)             = AVG2(L, K);
+  DST(0, 3) = AVG2(L, K);

-  DST(3, 0)             = AVG3(A, B, C);
-  DST(2, 0)             = AVG3(X, A, B);
+  DST(3, 0) = AVG3(A, B, C);
+  DST(2, 0) = AVG3(X, A, B);
  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
-  DST(1, 3)             = AVG3(L, K, J);
+  DST(1, 3) = AVG3(L, K, J);
 }

 static WEBP_INLINE void TM4_SSE2(uint8_t* WEBP_RESTRICT dst,
@@ -1244,11 +1240,11 @@ static void Mean16x4_SSE2(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
  const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
  const __m128i a2 = _mm_loadu_si128((const __m128i*)&ref[BPS * 2]);
  const __m128i a3 = _mm_loadu_si128((const __m128i*)&ref[BPS * 3]);
-  const __m128i b0 = _mm_srli_epi16(a0, 8);     // hi byte
+  const __m128i b0 = _mm_srli_epi16(a0, 8);  // hi byte
  const __m128i b1 = _mm_srli_epi16(a1, 8);
  const __m128i b2 = _mm_srli_epi16(a2, 8);
  const __m128i b3 = _mm_srli_epi16(a3, 8);
-  const __m128i c0 = _mm_and_si128(a0, mask);   // lo byte
+  const __m128i c0 = _mm_and_si128(a0, mask);  // lo byte
  const __m128i c1 = _mm_and_si128(a1, mask);
  const __m128i c2 = _mm_and_si128(a2, mask);
  const __m128i c3 = _mm_and_si128(a3, mask);
@@ -1357,7 +1353,7 @@ static int TTransform_SSE2(const uint8_t* WEBP_RESTRICT inA,
      const __m128i d1 = _mm_sub_epi16(zero, A_b2);
      const __m128i d2 = _mm_sub_epi16(zero, B_b0);
      const __m128i d3 = _mm_sub_epi16(zero, B_b2);
-      A_b0 = _mm_max_epi16(A_b0, d0);   // abs(v), 16b
+      A_b0 = _mm_max_epi16(A_b0, d0);  // abs(v), 16b
      A_b2 = _mm_max_epi16(A_b2, d1);
      B_b0 = _mm_max_epi16(B_b0, d2);
      B_b2 = _mm_max_epi16(B_b2, d3);
@@ -1494,11 +1490,11 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(
  // position instead of twelfth) and 8th values.
  {
    __m128i outZ0, outZ8;
-    outZ0 = _mm_shufflehi_epi16(out0,  _MM_SHUFFLE(2, 1, 3, 0));
-    outZ0 = _mm_shuffle_epi32  (outZ0, _MM_SHUFFLE(3, 1, 2, 0));
+    outZ0 = _mm_shufflehi_epi16(out0, _MM_SHUFFLE(2, 1, 3, 0));
+    outZ0 = _mm_shuffle_epi32(outZ0, _MM_SHUFFLE(3, 1, 2, 0));
    outZ0 = _mm_shufflehi_epi16(outZ0, _MM_SHUFFLE(3, 1, 0, 2));
-    outZ8 = _mm_shufflelo_epi16(out8,  _MM_SHUFFLE(3, 0, 2, 1));
-    outZ8 = _mm_shuffle_epi32  (outZ8, _MM_SHUFFLE(3, 1, 2, 0));
+    outZ8 = _mm_shufflelo_epi16(out8, _MM_SHUFFLE(3, 0, 2, 1));
+    outZ8 = _mm_shuffle_epi32(outZ8, _MM_SHUFFLE(3, 1, 2, 0));
    outZ8 = _mm_shufflelo_epi16(outZ8, _MM_SHUFFLE(1, 3, 2, 0));
    _mm_storeu_si128((__m128i*)&out[0], outZ0);
    _mm_storeu_si128((__m128i*)&out[8], outZ8);
@@ -1529,7 +1525,7 @@ static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
                                const VP8Matrix* WEBP_RESTRICT const mtx) {
  int nz;
  const uint16_t* const sharpen = &mtx->sharpen[0];
-  nz  = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
+  nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
  nz |= DoQuantizeBlock_SSE2(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
  return nz;
 }
--- a/src/dsp/enc_sse41.c
+++ b/src/dsp/enc_sse41.c
@@ -16,7 +16,6 @@
 #if defined(WEBP_USE_SSE41)
 #include <emmintrin.h>
 #include <smmintrin.h>
-
 #include <stdlib.h>  // for abs()

 #include "src/dsp/common_sse2.h"
@@ -33,7 +32,7 @@ static void CollectHistogram_SSE41(const uint8_t* WEBP_RESTRICT ref,
                                   VP8Histogram* WEBP_RESTRICT const histo) {
  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  int distribution[MAX_COEFF_THRESH + 1] = {0};
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    int k;
@@ -198,7 +197,7 @@ static int Disto16x16_SSE41(const uint8_t* WEBP_RESTRICT const a,
 //

 // Generates a pshufb constant for shuffling 16b words.
-#define PSHUFB_CST(A,B,C,D,E,F,G,H) \
+#define PSHUFB_CST(A, B, C, D, E, F, G, H)                         \
  _mm_set_epi8(2 * (H) + 1, 2 * (H) + 0, 2 * (G) + 1, 2 * (G) + 0, \
               2 * (F) + 1, 2 * (F) + 0, 2 * (E) + 1, 2 * (E) + 0, \
               2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, \
@@ -321,7 +320,7 @@ static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
                                 const VP8Matrix* WEBP_RESTRICT const mtx) {
  int nz;
  const uint16_t* const sharpen = &mtx->sharpen[0];
-  nz  = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
+  nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
  nz |= DoQuantizeBlock_SSE41(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
  return nz;
 }
--- a/src/dsp/filters.c
+++ b/src/dsp/filters.c
@@ -22,14 +22,14 @@
 //------------------------------------------------------------------------------
 // Helpful macro.

-#define DCHECK(in, out)                                                        \
-  do {                                                                         \
-    assert((in) != NULL);                                                      \
-    assert((out) != NULL);                                                     \
-    assert((in) != (out));                                                     \
-    assert(width > 0);                                                         \
-    assert(height > 0);                                                        \
-    assert(stride >= width);                                                   \
+#define DCHECK(in, out)      \
+  do {                       \
+    assert((in) != NULL);    \
+    assert((out) != NULL);   \
+    assert((in) != (out));   \
+    assert(width > 0);       \
+    assert(height > 0);      \
+    assert(stride >= width); \
  } while (0)

 #if !WEBP_NEON_OMIT_C_CODE
@@ -124,8 +124,7 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
    // leftmost pixel: predict from above.
    PredictLine_C(in, preds - stride, out, 1);
    for (w = 1; w < width; ++w) {
-      const int pred = GradientPredictor_C(preds[w - 1],
-                                           preds[w - stride],
+      const int pred = GradientPredictor_C(preds[w - 1], preds[w - stride],
                                           preds[w - stride - 1]);
      out[w] = (uint8_t)(in[w] - pred);
    }
@@ -141,20 +140,20 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
 //------------------------------------------------------------------------------

 #if !WEBP_NEON_OMIT_C_CODE
-static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data,
-                               int width, int height, int stride,
+static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data, int width,
+                               int height, int stride,
                               uint8_t* WEBP_RESTRICT filtered_data) {
  DoHorizontalFilter_C(data, width, height, stride, filtered_data);
 }

-static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data,
-                             int width, int height, int stride,
+static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data, int width,
+                             int height, int stride,
                             uint8_t* WEBP_RESTRICT filtered_data) {
  DoVerticalFilter_C(data, width, height, stride, filtered_data);
 }

-static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,
-                             int width, int height, int stride,
+static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data, int width,
+                             int height, int stride,
                             uint8_t* WEBP_RESTRICT filtered_data) {
  DoGradientFilter_C(data, width, height, stride, filtered_data);
 }
@@ -162,8 +161,8 @@ static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,

 //------------------------------------------------------------------------------

-static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in,
-                           uint8_t* out, int width) {
+static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in, uint8_t* out,
+                           int width) {
  (void)prev;
  if (out != in) memcpy(out, in, width * sizeof(*out));
 }
--- a/src/dsp/filters_mips_dsp_r2.c
+++ b/src/dsp/filters_mips_dsp_r2.c
@@ -16,30 +16,33 @@

 #if defined(WEBP_USE_MIPS_DSP_R2)

-#include "src/dsp/dsp.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>

+#include "src/dsp/dsp.h"
+
 //------------------------------------------------------------------------------
 // Helpful macro.

-#define DCHECK(in, out)                                                        \
-  do {                                                                         \
-    assert((in) != NULL);                                                      \
-    assert((out) != NULL);                                                     \
-    assert((in) != (out));                                                     \
-    assert(width > 0);                                                         \
-    assert(height > 0);                                                        \
-    assert(stride >= width);                                                   \
+#define DCHECK(in, out)      \
+  do {                       \
+    assert((in) != NULL);    \
+    assert((out) != NULL);   \
+    assert((in) != (out));   \
+    assert(width > 0);       \
+    assert(height > 0);      \
+    assert(stride >= width); \
  } while (0)

-#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do {                        \
+// clang-format off
+#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE)                             \
+  do {                                                                         \
    const uint8_t* psrc = (uint8_t*)(SRC);                                     \
    uint8_t* pdst = (uint8_t*)(DST);                                           \
    const int ilength = (int)(LENGTH);                                         \
    int temp0, temp1, temp2, temp3, temp4, temp5, temp6;                       \
-    __asm__ volatile (                                                         \
+    __asm__ volatile(                                                          \
      ".set      push                                   \n\t"                  \
      ".set      noreorder                              \n\t"                  \
      "srl       %[temp0],    %[length],    2           \n\t"                  \
@@ -101,6 +104,7 @@
      : "memory"                                                               \
    );                                                                         \
  } while (0)
+// clang-format on

 static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
                                              uint8_t* WEBP_RESTRICT dst,
@@ -108,13 +112,15 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
  DO_PREDICT_LINE(src, dst, length, 0);
 }

-#define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE) do {         \
+// clang-format off
+#define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE)              \
+  do {                                                                         \
    const uint8_t* psrc = (uint8_t*)(SRC);                                     \
    const uint8_t* ppred = (uint8_t*)(PRED);                                   \
    uint8_t* pdst = (uint8_t*)(DST);                                           \
    const int ilength = (int)(LENGTH);                                         \
    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;                \
-    __asm__ volatile (                                                         \
+    __asm__ volatile(                                                          \
      ".set      push                                   \n\t"                  \
      ".set      noreorder                              \n\t"                  \
      "srl       %[temp0],    %[length],    0x3         \n\t"                  \
@@ -167,9 +173,10 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
    );                                                                         \
  } while (0)

-#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST) do {                             \
+#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST)                                  \
+  do {                                                                         \
    int temp1, temp2, temp3;                                                   \
-    __asm__ volatile (                                                         \
+    __asm__ volatile(                                                          \
      "lbu       %[temp1],   0(%[src])               \n\t"                     \
      "lbu       %[temp2],   0(%[pred])              \n\t"                     \
      "subu      %[temp3],   %[temp1],   %[temp2]    \n\t"                     \
@@ -179,18 +186,20 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
      : "memory"                                                               \
    );                                                                         \
  } while (0)
+// clang-format on

 //------------------------------------------------------------------------------
 // Horizontal filter.

-#define FILTER_LINE_BY_LINE do {                                               \
-    for (row = 1; row < height; ++row) {                                       \
-      PREDICT_LINE_ONE_PASS(in, preds - stride, out);                          \
-      DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0);                          \
-      preds += stride;                                                         \
-      in += stride;                                                            \
-      out += stride;                                                           \
-    }                                                                          \
+#define FILTER_LINE_BY_LINE                           \
+  do {                                                \
+    for (row = 1; row < height; ++row) {              \
+      PREDICT_LINE_ONE_PASS(in, preds - stride, out); \
+      DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0); \
+      preds += stride;                                \
+      in += stride;                                   \
+      out += stride;                                  \
+    }                                                 \
  } while (0)

 static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(
@@ -221,13 +230,14 @@ static void HorizontalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
 //------------------------------------------------------------------------------
 // Vertical filter.

-#define FILTER_LINE_BY_LINE do {                                               \
-    for (row = 1; row < height; ++row) {                                       \
-      DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0);                      \
-      preds += stride;                                                         \
-      in += stride;                                                            \
-      out += stride;                                                           \
-    }                                                                          \
+#define FILTER_LINE_BY_LINE                               \
+  do {                                                    \
+    for (row = 1; row < height; ++row) {                  \
+      DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0); \
+      preds += stride;                                    \
+      in += stride;                                       \
+      out += stride;                                      \
+    }                                                     \
  } while (0)

 static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(
@@ -260,31 +270,30 @@ static void VerticalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,

 static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
  int temp0;
-  __asm__ volatile (
-    "addu             %[temp0],   %[a],       %[b]        \n\t"
-    "subu             %[temp0],   %[temp0],   %[c]        \n\t"
-    "shll_s.w         %[temp0],   %[temp0],   23          \n\t"
-    "precrqu_s.qb.ph  %[temp0],   %[temp0],   $zero       \n\t"
-    "srl              %[temp0],   %[temp0],   24          \n\t"
-    : [temp0]"=&r"(temp0)
-    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
-  );
+  __asm__ volatile(
+      "addu             %[temp0],   %[a],       %[b]        \n\t"
+      "subu             %[temp0],   %[temp0],   %[c]        \n\t"
+      "shll_s.w         %[temp0],   %[temp0],   23          \n\t"
+      "precrqu_s.qb.ph  %[temp0],   %[temp0],   $zero       \n\t"
+      "srl              %[temp0],   %[temp0],   24          \n\t"
+      : [temp0] "=&r"(temp0)
+      : [a] "r"(a), [b] "r"(b), [c] "r"(c));
  return temp0;
 }

-#define FILTER_LINE_BY_LINE(PREDS, OPERATION) do {                             \
-    for (row = 1; row < height; ++row) {                                       \
-      int w;                                                                   \
-      PREDICT_LINE_ONE_PASS(in, PREDS - stride, out);                          \
-      for (w = 1; w < width; ++w) {                                            \
-        const int pred = GradientPredictor_MIPSdspR2(PREDS[w - 1],             \
-                                                     PREDS[w - stride],        \
-                                                     PREDS[w - stride - 1]);   \
-        out[w] = in[w] OPERATION pred;                                         \
-      }                                                                        \
-      in += stride;                                                            \
-      out += stride;                                                           \
-    }                                                                          \
+#define FILTER_LINE_BY_LINE(PREDS, OPERATION)                        \
+  do {                                                               \
+    for (row = 1; row < height; ++row) {                             \
+      int w;                                                         \
+      PREDICT_LINE_ONE_PASS(in, PREDS - stride, out);                \
+      for (w = 1; w < width; ++w) {                                  \
+        const int pred = GradientPredictor_MIPSdspR2(                \
+            PREDS[w - 1], PREDS[w - stride], PREDS[w - stride - 1]); \
+        out[w] = in[w] OPERATION pred;                               \
+      }                                                              \
+      in += stride;                                                  \
+      out += stride;                                                 \
+    }                                                                \
  } while (0)

 static void DoGradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT in,
@@ -316,8 +325,8 @@ static void GradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,

 static void HorizontalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
                                         uint8_t* out, int width) {
- out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
- DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
+  out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
+  DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
 }

 static void VerticalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
--- a/src/dsp/filters_msa.c
+++ b/src/dsp/filters_msa.c
@@ -15,10 +15,10 @@

 #if defined(WEBP_USE_MSA)

-#include "src/dsp/msa_macro.h"
-
 #include <assert.h>

+#include "src/dsp/msa_macro.h"
+
 static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
                                            const uint8_t* pred,
                                            uint8_t* WEBP_RESTRICT dst,
@@ -57,21 +57,21 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
 //------------------------------------------------------------------------------
 // Helpful macro.

-#define DCHECK(in, out)        \
-  do {                         \
-    assert((in) != NULL);      \
-    assert((out) != NULL);     \
-    assert((in) != (out));     \
-    assert(width > 0);         \
-    assert(height > 0);        \
-    assert(stride >= width);   \
+#define DCHECK(in, out)      \
+  do {                       \
+    assert((in) != NULL);    \
+    assert((out) != NULL);   \
+    assert((in) != (out));   \
+    assert(width > 0);       \
+    assert(height > 0);      \
+    assert(stride >= width); \
  } while (0)

 //------------------------------------------------------------------------------
 // Horrizontal filter

-static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
-                                 int width, int height, int stride,
+static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data, int width,
+                                 int height, int stride,
                                 uint8_t* WEBP_RESTRICT filtered_data) {
  const uint8_t* preds = data;
  const uint8_t* in = data;
@@ -105,7 +105,7 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
                                            uint8_t* WEBP_RESTRICT poutput,
                                            int stride, int size) {
  int w;
-  const v16i8 zero = { 0 };
+  const v16i8 zero = {0};
  while (size >= 16) {
    v16u8 pred0, dst0;
    v8i16 a0, a1, b0, b1, c0, c1;
@@ -133,9 +133,8 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
  }
 }

-
-static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
-                               int width, int height, int stride,
+static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data, int width,
+                               int height, int stride,
                               uint8_t* WEBP_RESTRICT filtered_data) {
  const uint8_t* in = data;
  const uint8_t* preds = data;
@@ -151,7 +150,7 @@ static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
  out += stride;
  // Filter line-by-line.
  while (row < height) {
-    out[0] = in[0] - preds[- stride];
+    out[0] = in[0] - preds[-stride];
    PredictLineGradient(preds + 1, in + 1, out + 1, stride, width - 1);
    ++row;
    preds += stride;
@@ -163,8 +162,8 @@ static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
 //------------------------------------------------------------------------------
 // Vertical filter

-static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
-                               int width, int height, int stride,
+static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data, int width,
+                               int height, int stride,
                               uint8_t* WEBP_RESTRICT filtered_data) {
  const uint8_t* in = data;
  const uint8_t* preds = data;
--- a/Show More
+++ b/Show More