Fix checking for Wasm SIMD support

The defined(WEBP_ENABLE_WASM_SIMD) in dsp.h meant that cpu.cmake will never successfully compile with Wasm SIMD, since it doesn't set any compile flags. We remove that requirement, and instead determine in dec.c if we want to be using Wasm SIMD intrinsics. Bug: v8:12371 Change-Id: I5d313f6d95301e6153254229a9ea234fec990cd6
Add preliminary support of targeting Wasm
2025-12-24 05:56:27 +01:00 · 2021-11-18 16:47:01 -08:00 · 2021-11-15 15:17:11 -08:00
134 changed files with 3296 additions and 4900 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,9 +3,7 @@
 *.pc
 .DS_Store
 .deps
-.idea
 .libs
-.vscode
 /aclocal.m4
 /ar-lib
 /autom4te.cache
--- a/.mailmap
+++ b/.mailmap
@@ -8,11 +8,7 @@ Vikas Arora <vikasa@google.com>
 <vikasa@google.com> <vikaas.arora@gmail.com>
 <slobodan.prijic@imgtec.com> <Slobodan.Prijic@imgtec.com>
 <vrabaud@google.com> <vincent.rabaud@gmail.com>
-Vincent Rabaud <vrabaud@google.com>
 Tamar Levy <tamar.levy@intel.com>
 <qrczak@google.com> <qrczak>
 Hui Su <huisu@google.com>
 James Zern <jzern@google.com>
-Roberto Alanis <alanisbaez@google.com>
-Brian Ledger <brianpl@google.com>
-Maryla Ustarroz-Calonge <maryla@google.com>
--- a/5
+++ b/5
@@ -1,15 +1,12 @@
 Contributors:
 - Aidan O'Loan (aidanol at gmail dot com)
 - Alan Browning (browning at google dot com)
- Alexandru Ardelean (ardeleanalex at gmail dot com)
- Brian Ledger (brianpl at google dot com)
 - Charles Munger (clm at google dot com)
 - Cheng Yi (cyi at google dot com)
 - Christian Duvivier (cduvivier at google dot com)
 - Christopher Degawa (ccom at randomderp dot com)
 - Clement Courbet (courbet at google dot com)
 - Djordje Pesut (djordje dot pesut at imgtec dot com)
- Frank Barchard (fbarchard at google dot com)
 - Hui Su (huisu at google dot com)
 - Ilya Kurdyukov (jpegqs at gmail dot com)
 - Ingvar Stepanyan (rreverser at google dot com)
@@ -25,7 +22,6 @@ Contributors:
 - Mans Rullgard (mans at mansr dot com)
 - Marcin Kowalczyk (qrczak at google dot com)
 - Martin Olsson (mnemo at minimum dot se)
- Maryla Ustarroz-Calonge (maryla at google dot com)
 - Mikołaj Zalewski (mikolajz at google dot com)
 - Mislav Bradac (mislavm at google dot com)
 - Nico Weber (thakis at chromium dot org)
@@ -36,7 +32,6 @@ Contributors:
 - Pascal Massimino (pascal dot massimino at gmail dot com)
 - Paweł Hajdan, Jr (phajdan dot jr at chromium dot org)
 - Pierre Joye (pierre dot php at gmail dot com)
- Roberto Alanis (alanisbaez at google dot com)
 - Sam Clegg (sbc at chromium dot org)
 - Scott Hancher (seh at google dot com)
 - Scott LaVarnway (slavarnway at google dot com)
--- a/Android.mk
+++ b/Android.mk
@@ -33,14 +33,6 @@ else
  NEON := c
 endif

-sharpyuv_srcs := \
-    sharpyuv/sharpyuv.c \
-    sharpyuv/sharpyuv_csp.c \
-    sharpyuv/sharpyuv_dsp.c \
-    sharpyuv/sharpyuv_gamma.c \
-    sharpyuv/sharpyuv_neon.$(NEON) \
-    sharpyuv/sharpyuv_sse2.c \
-
 dec_srcs := \
    src/dec/alpha_dec.c \
    src/dec/buffer_dec.c \
@@ -212,7 +204,6 @@ endif  # ENABLE_SHARED=1
 include $(CLEAR_VARS)

 LOCAL_SRC_FILES := \
-    $(sharpyuv_srcs) \
    $(dsp_enc_srcs) \
    $(enc_srcs) \
    $(utils_enc_srcs) \
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,14 +15,6 @@ endif()
 project(WebP C)

 # Options for coder / decoder executables.
-if(BUILD_SHARED_LIBS)
-  set(WEBP_LINK_STATIC_DEFAULT OFF)
-else()
-  set(WEBP_LINK_STATIC_DEFAULT ON)
-endif()
-option(WEBP_LINK_STATIC
-       "Link using static libraries. If OFF, use dynamic libraries."
-       ${WEBP_LINK_STATIC_DEFAULT})
 if(NOT EMSCRIPTEN)
  # Disable SIMD on Emscripten by default, as it's a new unstable Wasm feature.
  # Users can still explicitly opt-in to make a SIMD-enabled build.
@@ -30,6 +22,9 @@ if(NOT EMSCRIPTEN)
 endif()
 option(WEBP_ENABLE_SIMD "Enable any SIMD optimization."
       ${WEBP_ENABLE_SIMD_DEFAULT})
+# Emscripten supports SSE builds using its compatibility headers, by default it
+# will use SSE4 if WEBP_ENABLE_WASM_SIMD is OFF and WEBP_ENABLE_SIMD is ON.
+option(WEBP_ENABLE_WASM_SIMD "Enable WebAssembly SIMD optimizations" OFF)
 option(WEBP_BUILD_ANIM_UTILS "Build animation utilities." ON)
 option(WEBP_BUILD_CWEBP "Build the cwebp command line tool." ON)
 option(WEBP_BUILD_DWEBP "Build the dwebp command line tool." ON)
@@ -48,18 +43,6 @@ option(WEBP_ENABLE_SWAP_16BIT_CSP "Enable byte swap for 16 bit colorspaces."
 set(WEBP_BITTRACE "0" CACHE STRING "Bit trace mode (0=none, 1=bit, 2=bytes)")
 set_property(CACHE WEBP_BITTRACE PROPERTY STRINGS 0 1 2)

-if(WEBP_LINK_STATIC)
-  if(WIN32)
-    SET(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
-  else()
-    SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
-  endif()
-  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-  # vwebp does not compile on Ubuntu with static libraries so disabling it for
-  # now.
-  set(WEBP_BUILD_VWEBP OFF)
-endif()
-
 # Option needed for handling Unicode file names on Windows.
 if(WIN32)
  option(WEBP_UNICODE "Build Unicode executables." ON)
@@ -68,7 +51,6 @@ endif()
 if(WEBP_BUILD_WEBP_JS)
  set(WEBP_BUILD_ANIM_UTILS OFF)
  set(WEBP_BUILD_CWEBP OFF)
-  set(WEBP_BUILD_DWEBP OFF)
  set(WEBP_BUILD_GIF2WEBP OFF)
  set(WEBP_BUILD_IMG2WEBP OFF)
  set(WEBP_BUILD_VWEBP OFF)
@@ -100,6 +82,9 @@ include(GNUInstallDirs)
 if(WEBP_ENABLE_SWAP_16BIT_CSP)
  add_definitions(-DWEBP_SWAP_16BIT_CSP=1)
 endif()
+if(WEBP_ENABLE_WASM_SIMD)
+  add_definitions(-DWEBP_ENABLE_WASM_SIMD_INTRINSICS)
+endif()

 if(NOT WEBP_BITTRACE STREQUAL "0")
  add_definitions(-DBITTRACE=${WEBP_BITTRACE})
@@ -117,8 +102,6 @@ set(includedir "\$\{prefix\}/include")
 set(PTHREAD_LIBS ${CMAKE_THREAD_LIBS_INIT})
 set(INSTALLED_LIBRARIES)

-set(CMAKE_C_VISIBILITY_PRESET hidden)
-
 # ##############################################################################
 # Android only.
 if(ANDROID)
@@ -233,18 +216,6 @@ function(libwebp_add_stub_file TARGET)
  target_sources(${TARGET} PRIVATE ${stub_source_file})
 endfunction()

-parse_makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/sharpyuv "WEBP_SHARPYUV_SRCS"
-                  "")
-add_library(sharpyuv OBJECT ${WEBP_SHARPYUV_SRCS})
-target_include_directories(sharpyuv
-                            PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
-                            ${CMAKE_CURRENT_SOURCE_DIR})
-set_target_properties(
-  sharpyuv
-  PROPERTIES PUBLIC_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/sharpyuv/sharpyuv.h;\
-${CMAKE_CURRENT_SOURCE_DIR}/sharpyuv/sharpyuv_csp.h;\
-${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h")
-
 if(MSVC)
  # avoid security warnings for e.g., fopen() used in the examples.
  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
@@ -309,7 +280,6 @@ target_include_directories(webputils
                           PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
                                   ${CMAKE_CURRENT_SOURCE_DIR})
 add_library(webp
-            $<TARGET_OBJECTS:sharpyuv>
            $<TARGET_OBJECTS:webpdecode>
            $<TARGET_OBJECTS:webpdsp>
            $<TARGET_OBJECTS:webpencode>
@@ -332,8 +302,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h")

 # Make sure the OBJECT libraries are built with position independent code (it is
 # not ON by default).
-set_target_properties(sharpyuv
-                      webpdecode
+set_target_properties(webpdecode
                      webpdspdecode
                      webputilsdecode
                      webpencode
@@ -447,14 +416,13 @@ if(WEBP_BUILD_ANIM_UTILS
   OR WEBP_BUILD_DWEBP
   OR WEBP_BUILD_GIF2WEBP
   OR WEBP_BUILD_IMG2WEBP
-   OR WEBP_BUILD_VWEBP
-   OR WEBP_BUILD_WEBPMUX
-   OR WEBP_BUILD_WEBPINFO)
+   OR WEBP_BUILD_VWEBP)
  # Example utility library.
  parse_makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/examples "EXAMPLEUTIL_SRCS"
                    "example_util_[^ ]*")
  list(APPEND EXAMPLEUTIL_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/examples/stopwatch.h)
  add_library(exampleutil STATIC ${EXAMPLEUTIL_SRCS})
+  target_link_libraries(exampleutil imageioutil)
  target_include_directories(
    exampleutil
    PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>)
@@ -463,7 +431,6 @@ if(WEBP_BUILD_ANIM_UTILS
                    "imageio_util_[^ ]*")
  add_library(imageioutil STATIC ${IMAGEIOUTILS_SRCS})
  target_link_libraries(imageioutil webp)
-  target_link_libraries(exampleutil imageioutil)

  # Image-decoding utility library.
  parse_makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/imageio "IMAGEDEC_SRCS"
@@ -509,18 +476,19 @@ endif()

 if(WEBP_BUILD_LIBWEBPMUX)
  parse_makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/mux "WEBP_MUX_SRCS" "")
-  add_library(webpmux ${WEBP_MUX_SRCS})
-  target_link_libraries(webpmux webp)
-  target_include_directories(webpmux
+  add_library(libwebpmux ${WEBP_MUX_SRCS})
+  target_link_libraries(libwebpmux webp)
+  target_include_directories(libwebpmux
                             PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
                                     ${CMAKE_CURRENT_SOURCE_DIR})
-  set_version(mux/Makefile.am webpmux webpmux)
-  set_target_properties(webpmux
+  set_version(mux/Makefile.am libwebpmux webpmux)
+  set_target_properties(libwebpmux
                        PROPERTIES PUBLIC_HEADER
                                   "${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux.h;\
 ${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux_types.h;\
 ${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h;")
-  list(APPEND INSTALLED_LIBRARIES webpmux)
+  set_target_properties(libwebpmux PROPERTIES OUTPUT_NAME webpmux)
+  list(APPEND INSTALLED_LIBRARIES libwebpmux)
  configure_pkg_config("src/mux/libwebpmux.pc")
 endif()

@@ -534,7 +502,7 @@ if(WEBP_BUILD_GIF2WEBP)
                        exampleutil
                        imageioutil
                        webp
-                        webpmux
+                        libwebpmux
                        ${WEBP_DEP_GIF_LIBRARIES})
  target_include_directories(gif2webp PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src)
  install(TARGETS gif2webp RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
@@ -551,7 +519,7 @@ if(WEBP_BUILD_IMG2WEBP)
                        imagedec
                        imageioutil
                        webp
-                        webpmux)
+                        libwebpmux)
  target_include_directories(img2webp PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src)
  install(TARGETS img2webp RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 endif()
@@ -591,9 +559,7 @@ if(WEBP_BUILD_WEBPINFO)
                    "webpinfo")
  add_executable(webpinfo ${WEBPINFO_SRCS})
  target_link_libraries(webpinfo exampleutil imageioutil)
-  target_include_directories(webpinfo
-                             PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src
-                                     ${CMAKE_CURRENT_SOURCE_DIR}/src)
+  target_include_directories(webpinfo PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src)
  install(TARGETS webpinfo RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 endif()

@@ -601,12 +567,10 @@ if(WEBP_BUILD_WEBPMUX)
  # webpmux
  parse_makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/examples "WEBPMUX_SRCS"
                    "webpmux")
-  add_executable(webpmux_app ${WEBPMUX_SRCS})
-  set_target_properties(webpmux_app PROPERTIES OUTPUT_NAME webpmux)
-  target_link_libraries(webpmux_app exampleutil imageioutil webpmux webp)
-  target_include_directories(webpmux_app
-                             PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src)
-  install(TARGETS webpmux_app RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+  add_executable(webpmux ${WEBPMUX_SRCS})
+  target_link_libraries(webpmux exampleutil imageioutil libwebpmux webp)
+  target_include_directories(webpmux PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src)
+  install(TARGETS webpmux RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 endif()

 if(WEBP_BUILD_EXTRAS)
@@ -616,13 +580,6 @@ if(WEBP_BUILD_EXTRAS)
  parse_makefile_am(${EXTRAS_MAKEFILE} "WEBP_QUALITY_SRCS" "webp_quality")
  parse_makefile_am(${EXTRAS_MAKEFILE} "VWEBP_SDL_SRCS" "vwebp_sdl")

-  # libextras
-  add_library(extras STATIC ${WEBP_EXTRAS_SRCS})
-  target_include_directories(extras
-                             PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
-                             ${CMAKE_CURRENT_SOURCE_DIR}
-                             ${CMAKE_CURRENT_SOURCE_DIR}/src)
-
  # get_disto
  add_executable(get_disto ${GET_DISTO_SRCS})
  target_link_libraries(get_disto imagedec)
@@ -631,15 +588,15 @@ if(WEBP_BUILD_EXTRAS)
                                     ${CMAKE_CURRENT_BINARY_DIR}/src)

  # webp_quality
-  add_executable(webp_quality ${WEBP_QUALITY_SRCS})
-  target_link_libraries(webp_quality exampleutil imagedec extras)
+  add_executable(webp_quality ${WEBP_QUALITY_SRCS} ${WEBP_EXTRAS_SRCS})
+  target_link_libraries(webp_quality exampleutil imagedec)
  target_include_directories(webp_quality
                             PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
                                     ${CMAKE_CURRENT_BINARY_DIR})

  # vwebp_sdl
  find_package(SDL)
-  if(WEBP_BUILD_VWEBP AND SDL_FOUND)
+  if(SDL_FOUND)
    add_executable(vwebp_sdl ${VWEBP_SDL_SRCS})
    target_link_libraries(vwebp_sdl ${SDL_LIBRARY} imageioutil webp)
    target_include_directories(vwebp_sdl
--- a/171
+++ b/171
@@ -1,176 +1,7 @@
-56a480e8 dsp/cpu.h: add missing extern "C"
-62b45bdd update ChangeLog (tag: v1.2.3-rc1)
-8764ec7a Merge changes Idb037953,Id582e395 into 1.2.3
-bcb872c3 vwebp: fix file name display in windows unicode build
-67c44ac5 webpmux: fix -frame option in windows unicode build
-8278825a makefile.unix: add sharpyuv objects to clean target
-14a49e01 update NEWS
-34b1dc33 bump version to 1.2.3
-0b397fda update AUTHORS
-c16488ac update .mailmap
-5a2d929c Merge "unicode.h: set console mode before using wprintf" into main
-169f867f unicode.h: set console mode before using wprintf
-a94b855c Merge "libsharpyuv: add version defines" into main
-f83bdb52 libsharpyuv: add version defines
-bef0d797 unicode_gif.h: fix -Wdeclaration-after-statement
-404c1622 Rename Huffman coding to prefix coding in the bitstream spec
-8895f8a3 Merge "run_static_analysis.sh: fix scan-build archive path" into main
-92a673d2 Merge "Add -fvisibility=hidden flag in CMakeLists." into main
-67c1d722 Merge "add WEBP_MSAN" into main
-1124ff66 Add -fvisibility=hidden flag in CMakeLists.
-e15b3560 add WEBP_MSAN
-ec9e782a sharpyuv: remove minimum image size from sharpyuv library
-7bd07f3b run_static_analysis.sh: fix scan-build archive path
-5ecee06f Merge "sharpyuv: increase precision of gamma<->linear conversion" into main
-f81dd7d6 Merge changes I3d17d529,I53026880,I1bd61639,I6bd4b25d,Icfec8fba into main
-2d607ee6 sharpyuv: increase precision of gamma<->linear conversion
-266cbbc5 sharpyuv: add 32bit version of SharpYuvFilterRow.
-9fc12274 CMake: add src to webpinfo includes
-7d18f40a CMake: add WEBP_BUILD_WEBPINFO to list of checks for exampleutil
-11309aa5 CMake: add WEBP_BUILD_WEBPMUX to list of checks for exampleutil
-4bc762f7 CMake: link imageioutil to exampleutil after defined
-0d1b9bc4 WEBP_DEP_LIBRARIES: use Threads::Threads
-20ef48f0 Merge "sharpyuv: add support for 10/12/16 bit rgb and 10/12 bit yuv." into main
-93c54371 sharpyuv: add support for 10/12/16 bit rgb and 10/12 bit yuv.
-53cf2b49 normalize WebPValidatePicture declaration w/definition
-d3006f4b sharpyuv: slightly improve precision
-ea967098 Merge changes Ia01bd397,Ibf3771af into main
-11bc8410 Merge changes I2d317c4b,I9e77f6db into main
-30453ea4 Add an internal WebPValidatePicture.
-6c43219a Some renamings for consistency.
-4f59fa73 update .mailmap
-e74f8a62 webp-lossless-bitstream-spec,cosmetics: normalize range syntax
-5a709ec0 webp-lossless-bitstream-spec,cosmetics: fix code typo
-a2093acc webp-lossless-bitstream-spec: add amendment note
-86c66930 webp-lossless-bitstream-spec: fix BNF
-232f22da webp-lossless-bitstream-spec: fix 'simple code' snippet
-44dd765d webp-lossless-bitstream-spec: fix ColorTransform impl
-7a7e33e9 webp-lossless-bitstream-spec: fix TR-pixel right border note
-86f94ee0 Update lossless spec with Huffman codes.
-a3927cc8 sharpyuv.c,cosmetics: fix indent
-6c45cef7 Make sure the stride has a minimum value in the importer.
-0c8b0e67 sharpyuv: cleanup/cosmetic changes
-dc3841e0 {histogram,predictor}_enc: quiet int -> float warnings
-a19a25bb Replace doubles by floats in lossless misc cost estimations.
-42888f6c Add an option to enable static builds.
-7efcf3cc Merge "Fix typo in color constants: Marix -> Matrix" into main
-8f4b5c62 Fix typo in color constants: Marix -> Matrix
-90084d84 Merge "demux,IsValidExtendedFormat: remove unused variable" into main
-ed643f61 Merge changes I452d2485,Ic6d75475 into main
-8fa053d1 Rename SharpYUV to SharpYuv for consistency.
-99a87562 SharpYuvComputeConversionMatrix: quiet int->float warnings
-deb426be Makefile.vc: add sharpyuv_csp.obj to SHARPYUV_OBJS
-779597d4 demux,IsValidExtendedFormat: remove unused variable
-40e8aa57 Merge "libsharpyuv: add colorspace utilities" into main
-01a05de1 libsharpyuv: add colorspace utilities
-2de4b05a Merge changes Id9890a60,I376d81e6,I1c958838 into main
-b8bca81f Merge "configure.ac: use LT_INIT if available" into main
-e8e77b9c Merge changes I479bc487,I39864691,I5d486c2c,I186d13be into main
-7e7d5d50 Merge ".gitignore: add Android Studio & VS code dirs" into main
-10c50848 normalize label indent
-89f774e6 mux{edit,internal}: fix leaks on error
-2d3293ad ExUtilInitCommandLineArguments: fix leak on error
-ec34fd70 anim_util: fix leaks on error
-e4717287 gif2webp: fix segfault on OOM
-e3cfafaf GetBackwardReferences: fail on alloc error
-a828a59b BackwardReferencesHashChainDistanceOnly: fix segfault on OOM
-fe153fae VP8LEncodeStream: fix segfault on OOM
-919acc0e .gitignore: add Android Studio & VS code dirs
-efa0731b configure.ac: use LT_INIT if available
-0957fd69 tiffdec: add grayscale support
-e685feef Merge "Make libsharpyuv self-contained by removing dependency on cpu.c" into main
-841960b6 Make libsharpyuv self-contained by removing dependency on cpu.c
-617cf036 image_dec: add WebPGetEnabledInputFileFormats()
-7a68afaa Let SharpArgbToYuv caller pass in an RGB>YUV conversion matrix.
-34bb332c man/cwebp.1: add note about crop/resize order
-f0e9351c webp-lossless-bitstream-spec,cosmetics: fix some typos
-5ccbd6ed vp8l_dec.c,cosmetics: fix a few typos
-c3d0c2d7 fix ios build scripts after sharpyuv dep added
-d0d2292e Merge "Make libwebp depend on libsharpyuv." into main
-03d12190 alpha_processing_neon.c: fix 0x01... typo
-d55d447c Make libwebp depend on libsharpyuv.
-e4cbcdd2 Fix lossless encoding for MIPS.
-924e7ca6 alpha_processing_neon.c: fix Dispatch/ExtractAlpha_NEON
-0fa0ea54 Makefile.vc: use /MANIFEST:EMBED
-29cc95ce Basic version of libsharpyuv in libwebp, in C.
-a30f2190 examples/webpmux.c: fix a couple of typos
-66b3ce23 Fix bad overflow check in ReadTIFF()
-54e61a38 Markdownify libwebp docs and reorganize them.
-b4533deb CMakeLists.txt,cosmetics: break long line
-b9d2f9cd quant_enc.c: use WEBP_RESTRICT qualifier
-ec178f2c Add progress hook granularity in lossless
-26139c73 Rename MAX_COST to MAX_BIT_COST in histogram_enc.c
-13b82816 cmake: fix webpmux lib name for cmake linking
-88b6a396 webp-container-spec.txt,cosmetics: normalize formatting
-6f496540 Merge tag 'v1.2.2'
-4074acf8 dsp.h: bump msvc arm64 version requirement to 16.6
-b0a86089 update ChangeLog (tag: v1.2.2)
-6db8248c libwebp: Fix VP8EncTokenLoop() progress
-827a307f BMP enc: fix the transparency case
-db25f1b4 libwebp: Fix VP8EncTokenLoop() progress
-286e7fce libwebp: do not destroy jpeg codec twice on error
-6e8a4126 libwebp: do not destroy jpeg codec twice on error
-faf21968 Merge "BMP enc: fix the transparency case" into main
-480cd51d BMP enc: fix the transparency case
-9195ea05 update ChangeLog (tag: v1.2.2-rc2)
-4acae017 update NEWS
-883f0633 man/img2webp.1: update date
-567e1f44 Reword img2webp synopsis command line
-1b0c15db man/img2webp.1: update date
-17bade38 Merge "Reword img2webp synopsis command line" into main
-a80954a1 Reword img2webp synopsis command line
-f084244d anim_decode: fix alpha blending with big-endian
-b217b4ff webpinfo: fix fourcc comparison w/big-endian
-ec497b75 Merge "anim_decode: fix alpha blending with big-endian" into main
-e4886716 anim_decode: fix alpha blending with big-endian
-e3cb052c webpinfo: fix fourcc comparison w/big-endian
-a510fedb patch-check: detect duplicated files
-f035d2e4 update ChangeLog (tag: v1.2.2-rc1)
-7031946a update NEWS
-973390b6 bump version to 1.2.2
-abd6664f update AUTHORS
-5b7e7930 Merge "add missing USE_{MSA,NEON} checks in headers" into main
-02ca04c3 add missing USE_{MSA,NEON} checks in headers
-e94716e2 xcframeworkbuild.sh: place headers in a subdir
-c846efd8 patch-check: commit subject length check
-b6f756e8 update http links
-8f5cb4c1 update rfc links
-8ea81561 change VP8LPredictorFunc signature to avoid reading 'left'
-6b1d18c3 webpmux: fix the -bgcolor description
-3368d876 Merge "webpmux: add "-set bgcolor A,R,G,B"" into main
-f213abf6 webpinfo: print the number of warnings
-50c97c30 webpmux: add "-set bgcolor A,R,G,B"
-2c206aaf Remove CMakeLists.txt check in compile.sh
-96e3dfef Merge "infra/common.sh: add shard_should_run()" into main
-0e0f74b7 infra/common.sh: add shard_should_run()
-35b7436a Jenkins scripts port: update shell function comments
-21d24b4c webp-container-spec.txt: remove 'experimental' markers
-cdcf8902 Merge "Port Jenkins script: compile" into main
-dc683cde Jenkins scripts port: static analysis
-0858494e Port Jenkins script: compile
-c2cf6a93 Jenkins scripts port: android compilation
-df0e808f presubmit: Add pylint-2.7 and .pylintrc
-676c57db patch-check: shfmt
-7bb7f747 patch-check: Add shellcheck
-abcd1797 Reformat docstrings and imports
-edaf0895 Port Jenkins scripts: compile js
-b9622063 Set CheckPatchFormatted flags to fail on diffs
-e23cd548 dsp.h: enable NEON w/VS2019+ ARM64 targets
-3875c7de CMakeLists.txt: set minimum version to 3.7
-1a8f0d45 Have a hard-coded value for memset in TrellisQuantizeBlock.
-93480160 Speed up TrellisQuantizeBlock
-45eaacc9 Convert deprecated uint32 to uint32_t.
-42592af8 webp,cmake: Remove unnecessary include dirs
-e298e05f Add patch-check steps in PRESUBMIT.py
-29148919 Merge tag 'v1.2.1'
-9ce5843d update ChangeLog (tag: v1.2.1)
 d9191588 fuzzer/*: normalize src/ includes
-c5bc3624 fuzzer/*: normalize src/ includes
 53b6f762 fix indent
-d2caaba4 fix indent
 731246ba update ChangeLog (tag: v1.2.1-rc2)
 d250f01d dsp/*: use WEBP_HAVE_* to determine Init availability
-1fe31625 dsp/*: use WEBP_HAVE_* to determine Init availability
 3a4d3ecd update NEWS
 b2bc8093 bump version to 1.2.1
 e542fc7a update AUTHORS
@@ -342,7 +173,7 @@ a99078c1 remove call to MBAnalyzeBestIntra4Mode for method >= 5
 6a0ff358 Enc: add a qmin / qmax range for quality factor
 0fa56f30 Merge tag 'v1.1.0'
 6cf504d0 PNM decoding: handle max_value != 255
-d7844e97 update ChangeLog (tag: v1.1.0-rc2, tag: v1.1.0)
+d7844e97 update ChangeLog (tag: v1.1.0-rc2, tag: v1.1.0, origin/1.1.0)
 7f006436 Makefile.vc: fix webp_quality.exe link
 cf047e83 Makefile.vc: fix webp_quality.exe link
 c074c653 update NEWS
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,5 +1,5 @@
 ACLOCAL_AMFLAGS = -I m4
-SUBDIRS = sharpyuv src imageio man
+SUBDIRS = src imageio man
 EXTRA_DIST = COPYING autogen.sh

 if BUILD_EXTRAS
--- a/Makefile.vc
+++ b/Makefile.vc
@@ -31,11 +31,12 @@ CCNODBG    = cl.exe $(NOLOGO) /O2 /DNDEBUG
 CCDEBUG    = cl.exe $(NOLOGO) /Od /Zi /D_DEBUG /RTC1
 CFLAGS     = /I. /Isrc $(NOLOGO) /W3 /EHsc /c
 CFLAGS     = $(CFLAGS) /DWIN32 /D_CRT_SECURE_NO_WARNINGS /DWIN32_LEAN_AND_MEAN
-LDFLAGS    = /LARGEADDRESSAWARE /MANIFEST:EMBED /NXCOMPAT /DYNAMICBASE
+LDFLAGS    = /LARGEADDRESSAWARE /MANIFEST /NXCOMPAT /DYNAMICBASE
 LDFLAGS    = $(LDFLAGS) $(PLATFORM_LDFLAGS)
 LNKDLL     = link.exe /DLL $(NOLOGO)
 LNKEXE     = link.exe $(NOLOGO)
 LNKLIB     = lib.exe $(NOLOGO)
+MT         = mt.exe $(NOLOGO)
 RCNODBG    = rc.exe $(NOLOGO) /l"0x0409"  # 0x409 = U.S. English
 RCDEBUG    = $(RCNODBG) /D_DEBUG

@@ -81,7 +82,6 @@ OUTPUT_DIRS = $(DIRBIN) $(DIRINC) $(DIRLIB) \
              $(DIROBJ)\extras \
              $(DIROBJ)\imageio \
              $(DIROBJ)\mux \
-              $(DIROBJ)\sharpyuv \
              $(DIROBJ)\utils \

 # Target configuration
@@ -174,14 +174,6 @@ CFLAGS = $(CFLAGS) /D_UNICODE /DUNICODE
 # A config was provided, so the library can be built.
 #

-SHARPYUV_OBJS = \
-    $(DIROBJ)\sharpyuv\sharpyuv.obj \
-    $(DIROBJ)\sharpyuv\sharpyuv_csp.obj \
-    $(DIROBJ)\sharpyuv\sharpyuv_dsp.obj \
-    $(DIROBJ)\sharpyuv\sharpyuv_gamma.obj \
-    $(DIROBJ)\sharpyuv\sharpyuv_neon.obj \
-    $(DIROBJ)\sharpyuv\sharpyuv_sse2.obj \
-
 DEC_OBJS = \
    $(DIROBJ)\dec\alpha_dec.obj \
    $(DIROBJ)\dec\buffer_dec.obj \
@@ -343,8 +335,8 @@ UTILS_ENC_OBJS = \
    $(DIROBJ)\utils\quant_levels_utils.obj \

 LIBWEBPDECODER_OBJS = $(DEC_OBJS) $(DSP_DEC_OBJS) $(UTILS_DEC_OBJS)
-LIBWEBP_OBJS = $(LIBWEBPDECODER_OBJS) $(SHARPYUV_OBJS) $(ENC_OBJS) \
-               $(DSP_ENC_OBJS) $(UTILS_ENC_OBJS) $(DLL_OBJS)
+LIBWEBP_OBJS = $(LIBWEBPDECODER_OBJS) $(ENC_OBJS) $(DSP_ENC_OBJS) \
+               $(UTILS_ENC_OBJS) $(DLL_OBJS)
 LIBWEBPMUX_OBJS = $(MUX_OBJS) $(LIBWEBPMUX_OBJS)
 LIBWEBPDEMUX_OBJS = $(DEMUX_OBJS) $(LIBWEBPDEMUX_OBJS)

@@ -490,8 +482,6 @@ $(DIROBJ)\examples\gifdec.obj: examples\gifdec.c
 	$(CC) $(CFLAGS) /Fd$(DIROBJ)\extras\ /Fo$(DIROBJ)\extras\ $<
 {imageio}.c{$(DIROBJ)\imageio}.obj::
 	$(CC) $(CFLAGS) /Fd$(DIROBJ)\imageio\ /Fo$(DIROBJ)\imageio\ $<
-{sharpyuv}.c{$(DIROBJ)\sharpyuv}.obj::
-	$(CC) $(CFLAGS) /Fd$(DIROBJ)\sharpyuv\ /Fo$(DIROBJ)\sharpyuv\ $<
 {src\dec}.c{$(DIROBJ)\dec}.obj::
 	$(CC) $(CFLAGS) /Fd$(LIBWEBP_PDBNAME) /Fo$(DIROBJ)\dec\ $<
 {src\demux}.c{$(DIROBJ)\demux}.obj::
@@ -512,9 +502,13 @@ LNKLIBS     = $(LNKLIBS) Shell32.lib

 {$(DIROBJ)\examples}.obj{$(DIRBIN)}.exe:
 	$(LNKEXE) $(LDFLAGS) /OUT:$@ $** $(LNKLIBS)
+	$(MT) -manifest $@.manifest -outputresource:$@;1
+	del $@.manifest

 {$(DIROBJ)\extras}.obj{$(DIRBIN)}.exe:
 	$(LNKEXE) $(LDFLAGS) /OUT:$@ $** $(LNKLIBS)
+	$(MT) -manifest $@.manifest -outputresource:$@;1
+	del $@.manifest

 clean::
 	@-erase /s $(DIROBJ)\*.dll 2> NUL
--- a/16
+++ b/16
@@ -1,19 +1,3 @@
- 6/30/2022: version 1.2.3
-  This is a binary compatible release.
-  * security fix for lossless encoder (#565, chromium:1313709)
-  * improved progress granularity in WebPReportProgress() when using lossless
-  * improved precision in Sharp YUV (-sharp_yuv) conversion
-  * many corrections to webp-lossless-bitstream-spec.txt (#551)
-  * crash/leak fixes on error/OOM and other bug fixes (#558, #563, #569, #573)
-
- 1/11/2022: version 1.2.2
-  This is a binary compatible release.
-  * webpmux: add "-set bgcolor A,R,G,B"
-  * add ARM64 NEON support for MSVC builds (#539)
-  * fix duplicate include error in Xcode when using multiple XCFrameworks in a
-    project (#542)
-  * doc updates and bug fixes (#538, #544, #548, #550)
-
 - 7/20/2021: version 1.2.1
  This is a binary compatible release.
  * minor lossless encoder improvements and x86 color conversion speed up
--- a/PRESUBMIT.py
+++ b/PRESUBMIT.py
@@ -33,75 +33,15 @@ See https://dev.chromium.org/developers/how-tos/depottools/presubmit-scripts for
 details on the presubmit API built into depot_tools.
 """

-import re
 import subprocess2

 USE_PYTHON3 = True
 _BASH_INDENTATION = "2"
-_GIT_COMMIT_SUBJECT_LENGTH = 65
 _INCLUDE_BASH_FILES_ONLY = [r".*\.sh$"]
 _INCLUDE_MAN_FILES_ONLY = [r"man/.+\.1$"]
-_INCLUDE_SOURCE_FILES_ONLY = [r".*\.[ch]$"]
 _LIBWEBP_MAX_LINE_LENGTH = 80


-def _CheckCommitSubjectLength(input_api, output_api):
-  """Ensures commit's subject length is no longer than 65 chars."""
-  name = "git-commit subject"
-  cmd = ["git", "log", "-1", "--pretty=%s"]
-  start = input_api.time.time()
-  proc = subprocess2.Popen(
-      cmd,
-      stderr=subprocess2.PIPE,
-      stdout=subprocess2.PIPE,
-      universal_newlines=True)
-
-  stdout, _ = proc.communicate()
-  duration = input_api.time.time() - start
-
-  if not re.match(r"^Revert",
-                  stdout) and (len(stdout) - 1) > _GIT_COMMIT_SUBJECT_LENGTH:
-    failure_msg = (
-        "The commit subject: %s is too long (%d chars)\n"
-        "Try to keep this to 50 or less (up to 65 is permitted for "
-        "non-reverts).\n"
-        "https://www.git-scm.com/book/en/v2/Distributed-Git-Contributing-to-a-"
-        "Project#_commit_guidelines") % (stdout, len(stdout) - 1)
-    return output_api.PresubmitError("%s\n (%4.2fs) failed\n%s" %
-                                     (name, duration, failure_msg))
-
-  return output_api.PresubmitResult("%s\n (%4.2fs) success" % (name, duration))
-
-
-def _CheckDuplicateFiles(input_api, output_api):
-  """Ensures there are not repeated filenames."""
-  all_files = []
-  for f in input_api.change.AllFiles():
-    for include_file in _INCLUDE_SOURCE_FILES_ONLY:
-      if re.match(include_file, f):
-        all_files.append(f)
-        break
-
-  basename_to_path = {}
-  for f in all_files:
-    basename_file = input_api.basename(f)
-    if basename_file in basename_to_path:
-      basename_to_path[basename_file].append(f)
-    else:
-      basename_to_path[basename_file] = [f]
-
-  dupes = []
-  for files in basename_to_path.values():
-    if len(files) > 1:
-      dupes.extend(files)
-
-  if dupes:
-    return output_api.PresubmitError(
-        "Duplicate source files, rebase or rename some to make them unique:\n%s"
-        % dupes)
-  return output_api.PresubmitResult("No duplicates, success\n")
-
-
 def _GetFilesToSkip(input_api):
  return list(input_api.DEFAULT_FILES_TO_SKIP) + [
      r"swig/.*\.py$",
@@ -183,8 +123,6 @@ def _CommonChecks(input_api, output_api):
  results.extend(
      input_api.canned_checks.CheckChangeHasNoStrayWhitespace(
          input_api, output_api))
-  results.append(_CheckCommitSubjectLength(input_api, output_api))
-  results.append(_CheckDuplicateFiles(input_api, output_api))

  source_file_filter = lambda x: input_api.FilterSourceFile(
      x, files_to_skip=_GetFilesToSkip(input_api))
--- a/795
+++ b/795
@@ -0,0 +1,795 @@
+          __   __  ____  ____  ____
+         /  \\/  \/  _ \/  _ )/  _ \
+         \       /   __/  _  \   __/
+          \__\__/\____/\_____/__/ ____  ___
+                / _/ /    \    \ /  _ \/ _/
+               /  \_/   / /   \ \   __/  \__
+               \____/____/\_____/_____/____/v1.2.1
+
+Description:
+============
+
+WebP codec: library to encode and decode images in WebP format. This package
+contains the library that can be used in other programs to add WebP support,
+as well as the command line tools 'cwebp' and 'dwebp'.
+
+See http://developers.google.com/speed/webp
+
+The latest source tree is available at
+https://chromium.googlesource.com/webm/libwebp
+
+It is released under the same license as the WebM project.
+See http://www.webmproject.org/license/software/ or the
+"COPYING" file for details. An additional intellectual
+property rights grant can be found in the file PATENTS.
+
+Building:
+=========
+
+Windows build:
+--------------
+
+By running:
+
+  nmake /f Makefile.vc CFG=release-static RTLIBCFG=static OBJDIR=output
+
+the directory output\release-static\(x64|x86)\bin will contain the tools
+cwebp.exe and dwebp.exe. The directory output\release-static\(x64|x86)\lib will
+contain the libwebp static library.
+The target architecture (x86/x64) is detected by Makefile.vc from the Visual
+Studio compiler (cl.exe) available in the system path.
+
+Unix build using makefile.unix:
+-------------------------------
+
+On platforms with GNU tools installed (gcc and make), running
+
+  make -f makefile.unix
+
+will build the binaries examples/cwebp and examples/dwebp, along
+with the static library src/libwebp.a. No system-wide installation
+is supplied, as this is a simple alternative to the full installation
+system based on the autoconf tools (see below).
+Please refer to makefile.unix for additional details and customizations.
+
+Using autoconf tools:
+---------------------
+Prerequisites:
+A compiler (e.g., gcc), make, autoconf, automake, libtool.
+On a Debian-like system the following should install everything you need for a
+minimal build:
+$ sudo apt-get install gcc make autoconf automake libtool
+
+When building from git sources, you will need to run autogen.sh to generate the
+configure script.
+
+./configure
+make
+make install
+
+should be all you need to have the following files
+
+/usr/local/include/webp/decode.h
+/usr/local/include/webp/encode.h
+/usr/local/include/webp/types.h
+/usr/local/lib/libwebp.*
+/usr/local/bin/cwebp
+/usr/local/bin/dwebp
+
+installed.
+
+Note: A decode-only library, libwebpdecoder, is available using the
+'--enable-libwebpdecoder' flag. The encode library is built separately and can
+be installed independently using a minor modification in the corresponding
+Makefile.am configure files (see comments there). See './configure --help' for
+more options.
+
+Building for MIPS Linux:
+------------------------
+MIPS Linux toolchain stable available releases can be found at:
+https://community.imgtec.com/developers/mips/tools/codescape-mips-sdk/available-releases/
+
+# Add toolchain to PATH
+export PATH=$PATH:/path/to/toolchain/bin
+
+# 32-bit build for mips32r5 (p5600)
+HOST=mips-mti-linux-gnu
+MIPS_CFLAGS="-O3 -mips32r5 -mabi=32 -mtune=p5600 -mmsa -mfp64 \
+  -msched-weight -mload-store-pairs -fPIE"
+MIPS_LDFLAGS="-mips32r5 -mabi=32 -mmsa -mfp64 -pie"
+
+# 64-bit build for mips64r6 (i6400)
+HOST=mips-img-linux-gnu
+MIPS_CFLAGS="-O3 -mips64r6 -mabi=64 -mtune=i6400 -mmsa -mfp64 \
+  -msched-weight -mload-store-pairs -fPIE"
+MIPS_LDFLAGS="-mips64r6 -mabi=64 -mmsa -mfp64 -pie"
+
+./configure --host=${HOST} --build=`config.guess` \
+  CC="${HOST}-gcc -EL" \
+  CFLAGS="$MIPS_CFLAGS" \
+  LDFLAGS="$MIPS_LDFLAGS"
+make
+make install
+
+CMake:
+------
+With CMake, you can compile libwebp, cwebp, dwebp, gif2webp, img2webp, webpinfo
+and the JS bindings.
+
+Prerequisites:
+A compiler (e.g., gcc with autotools) and CMake.
+On a Debian-like system the following should install everything you need for a
+minimal build:
+$ sudo apt-get install build-essential cmake
+
+When building from git sources, you will need to run cmake to generate the
+makefiles.
+
+mkdir build && cd build && cmake ../
+make
+make install
+
+If you also want any of the executables, you will need to enable them through
+CMake, e.g.:
+
+cmake -DWEBP_BUILD_CWEBP=ON -DWEBP_BUILD_DWEBP=ON ../
+
+or through your favorite interface (like ccmake or cmake-qt-gui).
+
+Use option -DWEBP_UNICODE=ON for Unicode support on Windows (with chcp 65001).
+
+Finally, once installed, you can also use WebP in your CMake project by doing:
+
+find_package(WebP)
+
+which will define the CMake variables WebP_INCLUDE_DIRS and WebP_LIBRARIES.
+
+Gradle:
+-------
+The support for Gradle is minimal: it only helps you compile libwebp, cwebp and
+dwebp and webpmux_example.
+
+Prerequisites:
+A compiler (e.g., gcc with autotools) and gradle.
+On a Debian-like system the following should install everything you need for a
+minimal build:
+$ sudo apt-get install build-essential gradle
+
+When building from git sources, you will need to run the Gradle wrapper with the
+appropriate target, e.g. :
+
+./gradlew buildAllExecutables
+
+SWIG bindings:
+--------------
+
+To generate language bindings from swig/libwebp.swig at least swig-1.3
+(http://www.swig.org) is required.
+
+Currently the following functions are mapped:
+Decode:
+  WebPGetDecoderVersion
+  WebPGetInfo
+  WebPDecodeRGBA
+  WebPDecodeARGB
+  WebPDecodeBGRA
+  WebPDecodeBGR
+  WebPDecodeRGB
+
+Encode:
+  WebPGetEncoderVersion
+  WebPEncodeRGBA
+  WebPEncodeBGRA
+  WebPEncodeRGB
+  WebPEncodeBGR
+  WebPEncodeLosslessRGBA
+  WebPEncodeLosslessBGRA
+  WebPEncodeLosslessRGB
+  WebPEncodeLosslessBGR
+
+See swig/README for more detailed build instructions.
+
+Java bindings:
+
+To build the swig-generated JNI wrapper code at least JDK-1.5 (or equivalent)
+is necessary for enum support. The output is intended to be a shared object /
+DLL that can be loaded via System.loadLibrary("webp_jni").
+
+Python bindings:
+
+To build the swig-generated Python extension code at least Python 2.6 is
+required. Python < 2.6 may build with some minor changes to libwebp.swig or the
+generated code, but is untested.
+
+Encoding tool:
+==============
+
+The examples/ directory contains tools for encoding (cwebp) and
+decoding (dwebp) images.
+
+The easiest use should look like:
+  cwebp input.png -q 80 -o output.webp
+which will convert the input file to a WebP file using a quality factor of 80
+on a 0->100 scale (0 being the lowest quality, 100 being the best. Default
+value is 75).
+You might want to try the -lossless flag too, which will compress the source
+(in RGBA format) without any loss. The -q quality parameter will in this case
+control the amount of processing time spent trying to make the output file as
+small as possible.
+
+A longer list of options is available using the -longhelp command line flag:
+
+> cwebp -longhelp
+Usage:
+ cwebp [-preset <...>] [options] in_file [-o out_file]
+
+If input size (-s) for an image is not specified, it is
+assumed to be a PNG, JPEG, TIFF or WebP file.
+Note: Animated PNG and WebP files are not supported.
+
+Options:
+  -h / -help ............. short help
+  -H / -longhelp ......... long help
+  -q <float> ............. quality factor (0:small..100:big), default=75
+  -alpha_q <int> ......... transparency-compression quality (0..100),
+                           default=100
+  -preset <string> ....... preset setting, one of:
+                            default, photo, picture,
+                            drawing, icon, text
+     -preset must come first, as it overwrites other parameters
+  -z <int> ............... activates lossless preset with given
+                           level in [0:fast, ..., 9:slowest]
+
+  -m <int> ............... compression method (0=fast, 6=slowest), default=4
+  -segments <int> ........ number of segments to use (1..4), default=4
+  -size <int> ............ target size (in bytes)
+  -psnr <float> .......... target PSNR (in dB. typically: 42)
+
+  -s <int> <int> ......... input size (width x height) for YUV
+  -sns <int> ............. spatial noise shaping (0:off, 100:max), default=50
+  -f <int> ............... filter strength (0=off..100), default=60
+  -sharpness <int> ....... filter sharpness (0:most .. 7:least sharp), default=0
+  -strong ................ use strong filter instead of simple (default)
+  -nostrong .............. use simple filter instead of strong
+  -sharp_yuv ............. use sharper (and slower) RGB->YUV conversion
+  -partition_limit <int> . limit quality to fit the 512k limit on
+                           the first partition (0=no degradation ... 100=full)
+  -pass <int> ............ analysis pass number (1..10)
+  -qrange <min> <max> .... specifies the permissible quality range
+                           (default: 0 100)
+  -crop <x> <y> <w> <h> .. crop picture with the given rectangle
+  -resize <w> <h> ........ resize picture (after any cropping)
+  -mt .................... use multi-threading if available
+  -low_memory ............ reduce memory usage (slower encoding)
+  -map <int> ............. print map of extra info
+  -print_psnr ............ prints averaged PSNR distortion
+  -print_ssim ............ prints averaged SSIM distortion
+  -print_lsim ............ prints local-similarity distortion
+  -d <file.pgm> .......... dump the compressed output (PGM file)
+  -alpha_method <int> .... transparency-compression method (0..1), default=1
+  -alpha_filter <string> . predictive filtering for alpha plane,
+                           one of: none, fast (default) or best
+  -exact ................. preserve RGB values in transparent area, default=off
+  -blend_alpha <hex> ..... blend colors against background color
+                           expressed as RGB values written in
+                           hexadecimal, e.g. 0xc0e0d0 for red=0xc0
+                           green=0xe0 and blue=0xd0
+  -noalpha ............... discard any transparency information
+  -lossless .............. encode image losslessly, default=off
+  -near_lossless <int> ... use near-lossless image
+                           preprocessing (0..100=off), default=100
+  -hint <string> ......... specify image characteristics hint,
+                           one of: photo, picture or graph
+
+  -metadata <string> ..... comma separated list of metadata to
+                           copy from the input to the output if present.
+                           Valid values: all, none (default), exif, icc, xmp
+
+  -short ................. condense printed message
+  -quiet ................. don't print anything
+  -version ............... print version number and exit
+  -noasm ................. disable all assembly optimizations
+  -v ..................... verbose, e.g. print encoding/decoding times
+  -progress .............. report encoding progress
+
+Experimental Options:
+  -jpeg_like ............. roughly match expected JPEG size
+  -af .................... auto-adjust filter strength
+  -pre <int> ............. pre-processing filter
+
+
+The main options you might want to try in order to further tune the
+visual quality are:
+ -preset
+ -sns
+ -f
+ -m
+
+Namely:
+  * 'preset' will set up a default encoding configuration targeting a
+     particular type of input. It should appear first in the list of options,
+     so that subsequent options can take effect on top of this preset.
+     Default value is 'default'.
+  * 'sns' will progressively turn on (when going from 0 to 100) some additional
+     visual optimizations (like: segmentation map re-enforcement). This option
+     will balance the bit allocation differently. It tries to take bits from the
+     "easy" parts of the picture and use them in the "difficult" ones instead.
+     Usually, raising the sns value (at fixed -q value) leads to larger files,
+     but with better quality.
+     Typical value is around '75'.
+  * 'f' option directly links to the filtering strength used by the codec's
+     in-loop processing. The higher the value, the smoother the
+     highly-compressed area will look. This is particularly useful when aiming
+     at very small files. Typical values are around 20-30. Note that using the
+     option -strong/-nostrong will change the type of filtering. Use "-f 0" to
+     turn filtering off.
+  * 'm' controls the trade-off between encoding speed and quality. Default is 4.
+     You can try -m 5 or -m 6 to explore more (time-consuming) encoding
+     possibilities. A lower value will result in faster encoding at the expense
+     of quality.
+
+Decoding tool:
+==============
+
+There is a decoding sample in examples/dwebp.c which will take
+a .webp file and decode it to a PNG image file (amongst other formats).
+This is simply to demonstrate the use of the API. You can verify the
+file test.webp decodes to exactly the same as test_ref.ppm by using:
+
+ cd examples
+ ./dwebp test.webp -ppm -o test.ppm
+ diff test.ppm test_ref.ppm
+
+The full list of options is available using -h:
+
+> dwebp -h
+Usage: dwebp in_file [options] [-o out_file]
+
+Decodes the WebP image file to PNG format [Default].
+Note: Animated WebP files are not supported.
+
+Use following options to convert into alternate image formats:
+  -pam ......... save the raw RGBA samples as a color PAM
+  -ppm ......... save the raw RGB samples as a color PPM
+  -bmp ......... save as uncompressed BMP format
+  -tiff ........ save as uncompressed TIFF format
+  -pgm ......... save the raw YUV samples as a grayscale PGM
+                 file with IMC4 layout
+  -yuv ......... save the raw YUV samples in flat layout
+
+ Other options are:
+  -version ..... print version number and exit
+  -nofancy ..... don't use the fancy YUV420 upscaler
+  -nofilter .... disable in-loop filtering
+  -nodither .... disable dithering
+  -dither <d> .. dithering strength (in 0..100)
+  -alpha_dither  use alpha-plane dithering if needed
+  -mt .......... use multi-threading
+  -crop <x> <y> <w> <h> ... crop output with the given rectangle
+  -resize <w> <h> ......... scale the output (*after* any cropping)
+  -flip ........ flip the output vertically
+  -alpha ....... only save the alpha plane
+  -incremental . use incremental decoding (useful for tests)
+  -h ........... this help message
+  -v ........... verbose (e.g. print encoding/decoding times)
+  -quiet ....... quiet mode, don't print anything
+  -noasm ....... disable all assembly optimizations
+
+WebP file analysis tool:
+========================
+
+'webpinfo' can be used to print out the chunk level structure and bitstream
+header information of WebP files. It can also check if the files are of valid
+WebP format.
+
+Usage: webpinfo [options] in_files
+Note: there could be multiple input files;
+      options must come before input files.
+Options:
+  -version ........... Print version number and exit.
+  -quiet ............. Do not show chunk parsing information.
+  -diag .............. Show parsing error diagnosis.
+  -summary ........... Show chunk stats summary.
+  -bitstream_info .... Parse bitstream header.
+
+Visualization tool:
+===================
+
+There's a little self-serve visualization tool called 'vwebp' under the
+examples/ directory. It uses OpenGL to open a simple drawing window and show
+a decoded WebP file. It's not yet integrated in the automake build system, but
+you can try to manually compile it using the recommendations below.
+
+Usage: vwebp in_file [options]
+
+Decodes the WebP image file and visualize it using OpenGL
+Options are:
+  -version ..... print version number and exit
+  -noicc ....... don't use the icc profile if present
+  -nofancy ..... don't use the fancy YUV420 upscaler
+  -nofilter .... disable in-loop filtering
+  -dither <int>  dithering strength (0..100), default=50
+  -noalphadither disable alpha plane dithering
+  -usebgcolor .. display background color
+  -mt .......... use multi-threading
+  -info ........ print info
+  -h ........... this help message
+
+Keyboard shortcuts:
+  'c' ................ toggle use of color profile
+  'b' ................ toggle background color display
+  'i' ................ overlay file information
+  'd' ................ disable blending & disposal (debug)
+  'q' / 'Q' / ESC .... quit
+
+Building:
+---------
+
+Prerequisites:
+1) OpenGL & OpenGL Utility Toolkit (GLUT)
+  Linux:
+    $ sudo apt-get install freeglut3-dev mesa-common-dev
+  Mac + Xcode:
+    - These libraries should be available in the OpenGL / GLUT frameworks.
+  Windows:
+    http://freeglut.sourceforge.net/index.php#download
+
+2) (Optional) qcms (Quick Color Management System)
+  i. Download qcms from Mozilla / Chromium:
+    http://hg.mozilla.org/mozilla-central/file/0e7639e3bdfb/gfx/qcms
+    http://src.chromium.org/viewvc/chrome/trunk/src/third_party/qcms
+  ii. Build and archive the source files as libqcms.a / qcms.lib
+  iii. Update makefile.unix / Makefile.vc
+    a) Define WEBP_HAVE_QCMS
+    b) Update include / library paths to reference the qcms directory.
+
+Build using makefile.unix / Makefile.vc:
+$ make -f makefile.unix examples/vwebp
+> nmake /f Makefile.vc CFG=release-static \
+    ../obj/x64/release-static/bin/vwebp.exe
+
+Animation creation tool:
+========================
+The utility 'img2webp' can turn a sequence of input images (PNG, JPEG, ...)
+into an animated WebP file. It offers fine control over duration, encoding
+modes, etc.
+
+Usage:
+
+  img2webp [file-level options] [image files...] [per-frame options...]
+
+File-level options (only used at the start of compression):
+ -min_size ............ minimize size
+ -loop <int> .......... loop count (default: 0, = infinite loop)
+ -kmax <int> .......... maximum number of frame between key-frames
+                        (0=only keyframes)
+ -kmin <int> .......... minimum number of frame between key-frames
+                        (0=disable key-frames altogether)
+ -mixed ............... use mixed lossy/lossless automatic mode
+ -v ................... verbose mode
+ -h ................... this help
+ -version ............. print version number and exit
+
+Per-frame options (only used for subsequent images input):
+ -d <int> ............. frame duration in ms (default: 100)
+ -lossless  ........... use lossless mode (default)
+ -lossy ... ........... use lossy mode
+ -q <float> ........... quality
+ -m <int> ............. method to use
+
+example: img2webp -loop 2 in0.png -lossy in1.jpg
+                  -d 80 in2.tiff -o out.webp
+
+Note: if a single file name is passed as the argument, the arguments will be
+tokenized from this file. The file name must not start with the character '-'.
+
+Animated GIF conversion:
+========================
+Animated GIF files can be converted to WebP files with animation using the
+gif2webp utility available under examples/. The files can then be viewed using
+vwebp.
+
+Usage:
+ gif2webp [options] gif_file -o webp_file
+Options:
+  -h / -help ............. this help
+  -lossy ................. encode image using lossy compression
+  -mixed ................. for each frame in the image, pick lossy
+                           or lossless compression heuristically
+  -q <float> ............. quality factor (0:small..100:big)
+  -m <int> ............... compression method (0=fast, 6=slowest)
+  -min_size .............. minimize output size (default:off)
+                           lossless compression by default; can be
+                           combined with -q, -m, -lossy or -mixed
+                           options
+  -kmin <int> ............ min distance between key frames
+  -kmax <int> ............ max distance between key frames
+  -f <int> ............... filter strength (0=off..100)
+  -metadata <string> ..... comma separated list of metadata to
+                           copy from the input to the output if present
+                           Valid values: all, none, icc, xmp (default)
+  -loop_compatibility .... use compatibility mode for Chrome
+                           version prior to M62 (inclusive)
+  -mt .................... use multi-threading if available
+
+  -version ............... print version number and exit
+  -v ..................... verbose
+  -quiet ................. don't print anything
+
+Building:
+---------
+With the libgif development files installed, gif2webp can be built using
+makefile.unix:
+$ make -f makefile.unix examples/gif2webp
+
+or using autoconf:
+$ ./configure --enable-everything
+$ make
+
+Comparison of animated images:
+==============================
+Test utility anim_diff under examples/ can be used to compare two animated
+images (each can be GIF or WebP).
+
+Usage: anim_diff <image1> <image2> [options]
+
+Options:
+  -dump_frames <folder> dump decoded frames in PAM format
+  -min_psnr <float> ... minimum per-frame PSNR
+  -raw_comparison ..... if this flag is not used, RGB is
+                        premultiplied before comparison
+  -max_diff <int> ..... maximum allowed difference per channel
+                        between corresponding pixels in subsequent
+                        frames
+  -h .................. this help
+  -version ............ print version number and exit
+
+Building:
+---------
+With the libgif development files and a C++ compiler installed, anim_diff can
+be built using makefile.unix:
+$ make -f makefile.unix examples/anim_diff
+
+or using autoconf:
+$ ./configure --enable-everything
+$ make
+
+Encoding API:
+=============
+
+The main encoding functions are available in the header src/webp/encode.h
+The ready-to-use ones are:
+size_t WebPEncodeRGB(const uint8_t* rgb, int width, int height, int stride,
+                     float quality_factor, uint8_t** output);
+size_t WebPEncodeBGR(const uint8_t* bgr, int width, int height, int stride,
+                     float quality_factor, uint8_t** output);
+size_t WebPEncodeRGBA(const uint8_t* rgba, int width, int height, int stride,
+                      float quality_factor, uint8_t** output);
+size_t WebPEncodeBGRA(const uint8_t* bgra, int width, int height, int stride,
+                      float quality_factor, uint8_t** output);
+
+They will convert raw RGB samples to a WebP data. The only control supplied
+is the quality factor.
+
+There are some variants for using the lossless format:
+
+size_t WebPEncodeLosslessRGB(const uint8_t* rgb, int width, int height,
+                             int stride, uint8_t** output);
+size_t WebPEncodeLosslessBGR(const uint8_t* bgr, int width, int height,
+                             int stride, uint8_t** output);
+size_t WebPEncodeLosslessRGBA(const uint8_t* rgba, int width, int height,
+                              int stride, uint8_t** output);
+size_t WebPEncodeLosslessBGRA(const uint8_t* bgra, int width, int height,
+                              int stride, uint8_t** output);
+
+Of course in this case, no quality factor is needed since the compression
+occurs without loss of the input values, at the expense of larger output sizes.
+
+Advanced encoding API:
+----------------------
+
+A more advanced API is based on the WebPConfig and WebPPicture structures.
+
+WebPConfig contains the encoding settings and is not tied to a particular
+picture.
+WebPPicture contains input data, on which some WebPConfig will be used for
+compression.
+The encoding flow looks like:
+
+-------------------------------------- BEGIN PSEUDO EXAMPLE
+
+#include <webp/encode.h>
+
+  // Setup a config, starting form a preset and tuning some additional
+  // parameters
+  WebPConfig config;
+  if (!WebPConfigPreset(&config, WEBP_PRESET_PHOTO, quality_factor)) {
+    return 0;   // version error
+  }
+  // ... additional tuning
+  config.sns_strength = 90;
+  config.filter_sharpness = 6;
+  config_error = WebPValidateConfig(&config);  // not mandatory, but useful
+
+  // Setup the input data
+  WebPPicture pic;
+  if (!WebPPictureInit(&pic)) {
+    return 0;  // version error
+  }
+  pic.width = width;
+  pic.height = height;
+  // allocated picture of dimension width x height
+  if (!WebPPictureAlloc(&pic)) {
+    return 0;   // memory error
+  }
+  // at this point, 'pic' has been initialized as a container,
+  // and can receive the Y/U/V samples.
+  // Alternatively, one could use ready-made import functions like
+  // WebPPictureImportRGB(), which will take care of memory allocation.
+  // In any case, past this point, one will have to call
+  // WebPPictureFree(&pic) to reclaim memory.
+
+  // Set up a byte-output write method. WebPMemoryWriter, for instance.
+  WebPMemoryWriter wrt;
+  WebPMemoryWriterInit(&wrt);     // initialize 'wrt'
+
+  pic.writer = MyFileWriter;
+  pic.custom_ptr = my_opaque_structure_to_make_MyFileWriter_work;
+
+  // Compress!
+  int ok = WebPEncode(&config, &pic);   // ok = 0 => error occurred!
+  WebPPictureFree(&pic);  // must be called independently of the 'ok' result.
+
+  // output data should have been handled by the writer at that point.
+  // -> compressed data is the memory buffer described by wrt.mem / wrt.size
+
+  // deallocate the memory used by compressed data
+  WebPMemoryWriterClear(&wrt);
+
+-------------------------------------- END PSEUDO EXAMPLE
+
+Decoding API:
+=============
+
+This is mainly just one function to call:
+
+#include "webp/decode.h"
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
+                       int* width, int* height);
+
+Please have a look at the file src/webp/decode.h for the details.
+There are variants for decoding in BGR/RGBA/ARGB/BGRA order, along with
+decoding to raw Y'CbCr samples. One can also decode the image directly into a
+pre-allocated buffer.
+
+To detect a WebP file and gather the picture's dimensions, the function:
+  int WebPGetInfo(const uint8_t* data, size_t data_size,
+                  int* width, int* height);
+is supplied. No decoding is involved when using it.
+
+Incremental decoding API:
+=========================
+
+In the case when data is being progressively transmitted, pictures can still
+be incrementally decoded using a slightly more complicated API. Decoder state
+is stored into an instance of the WebPIDecoder object. This object can be
+created with the purpose of decoding either RGB or Y'CbCr samples.
+For instance:
+
+  WebPDecBuffer buffer;
+  WebPInitDecBuffer(&buffer);
+  buffer.colorspace = MODE_BGR;
+  ...
+  WebPIDecoder* idec = WebPINewDecoder(&buffer);
+
+As data is made progressively available, this incremental-decoder object
+can be used to decode the picture further. There are two (mutually exclusive)
+ways to pass freshly arrived data:
+
+either by appending the fresh bytes:
+
+  WebPIAppend(idec, fresh_data, size_of_fresh_data);
+
+or by just mentioning the new size of the transmitted data:
+
+  WebPIUpdate(idec, buffer, size_of_transmitted_buffer);
+
+Note that 'buffer' can be modified between each call to WebPIUpdate, in
+particular when the buffer is resized to accommodate larger data.
+
+These functions will return the decoding status: either VP8_STATUS_SUSPENDED if
+decoding is not finished yet or VP8_STATUS_OK when decoding is done. Any other
+status is an error condition.
+
+The 'idec' object must always be released (even upon an error condition) by
+calling: WebPDelete(idec).
+
+To retrieve partially decoded picture samples, one must use the corresponding
+method: WebPIDecGetRGB or WebPIDecGetYUVA.
+It will return the last displayable pixel row.
+
+Lastly, note that decoding can also be performed into a pre-allocated pixel
+buffer. This buffer must be passed when creating a WebPIDecoder, calling
+WebPINewRGB() or WebPINewYUVA().
+
+Please have a look at the src/webp/decode.h header for further details.
+
+Advanced Decoding API:
+======================
+
+WebP decoding supports an advanced API which provides on-the-fly cropping and
+rescaling, something of great usefulness on memory-constrained environments like
+mobile phones. Basically, the memory usage will scale with the output's size,
+not the input's, when one only needs a quick preview or a zoomed in portion of
+an otherwise too-large picture. Some CPU can be saved too, incidentally.
+
+-------------------------------------- BEGIN PSEUDO EXAMPLE
+     // A) Init a configuration object
+     WebPDecoderConfig config;
+     CHECK(WebPInitDecoderConfig(&config));
+
+     // B) optional: retrieve the bitstream's features.
+     CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
+
+     // C) Adjust 'config' options, if needed
+     config.options.no_fancy_upsampling = 1;
+     config.options.use_scaling = 1;
+     config.options.scaled_width = scaledWidth();
+     config.options.scaled_height = scaledHeight();
+     // etc.
+
+     // D) Specify 'config' output options for specifying output colorspace.
+     // Optionally the external image decode buffer can also be specified.
+     config.output.colorspace = MODE_BGRA;
+     // Optionally, the config.output can be pointed to an external buffer as
+     // well for decoding the image. This externally supplied memory buffer
+     // should be big enough to store the decoded picture.
+     config.output.u.RGBA.rgba = (uint8_t*) memory_buffer;
+     config.output.u.RGBA.stride = scanline_stride;
+     config.output.u.RGBA.size = total_size_of_the_memory_buffer;
+     config.output.is_external_memory = 1;
+
+     // E) Decode the WebP image. There are two variants w.r.t decoding image.
+     // The first one (E.1) decodes the full image and the second one (E.2) is
+     // used to incrementally decode the image using small input buffers.
+     // Any one of these steps can be used to decode the WebP image.
+
+     // E.1) Decode full image.
+     CHECK(WebPDecode(data, data_size, &config) == VP8_STATUS_OK);
+
+     // E.2) Decode image incrementally.
+     WebPIDecoder* const idec = WebPIDecode(NULL, NULL, &config);
+     CHECK(idec != NULL);
+     while (bytes_remaining > 0) {
+       VP8StatusCode status = WebPIAppend(idec, input, bytes_read);
+       if (status == VP8_STATUS_OK || status == VP8_STATUS_SUSPENDED) {
+         bytes_remaining -= bytes_read;
+       } else {
+         break;
+       }
+     }
+     WebPIDelete(idec);
+
+     // F) Decoded image is now in config.output (and config.output.u.RGBA).
+     // It can be saved, displayed or otherwise processed.
+
+     // G) Reclaim memory allocated in config's object. It's safe to call
+     // this function even if the memory is external and wasn't allocated
+     // by WebPDecode().
+     WebPFreeDecBuffer(&config.output);
+
+-------------------------------------- END PSEUDO EXAMPLE
+
+Bugs:
+=====
+
+Please report all bugs to the issue tracker:
+    https://bugs.chromium.org/p/webp
+Patches welcome! See this page to get started:
+    http://www.webmproject.org/code/contribute/submitting-patches/
+
+Discuss:
+========
+
+Email: webp-discuss@webmproject.org
+Web: http://groups.google.com/a/webmproject.org/group/webp-discuss
--- a/README.md
+++ b/README.md
@@ -1,53 +0,0 @@
-# WebP Codec
-
-```
-      __   __  ____  ____  ____
-     /  \\/  \/  _ \/  _ )/  _ \
-     \       /   __/  _  \   __/
-      \__\__/\____/\_____/__/ ____  ___
-            / _/ /    \    \ /  _ \/ _/
-           /  \_/   / /   \ \   __/  \__
-           \____/____/\_____/_____/____/v1.2.3
-```
-
-WebP codec is a library to encode and decode images in WebP format. This package
-contains the library that can be used in other programs to add WebP support, as
-well as the command line tools 'cwebp' and 'dwebp' to compress and decompress
-images respectively.
-
-See https://developers.google.com/speed/webp for details on the image format.
-
-The latest source tree is available at
-https://chromium.googlesource.com/webm/libwebp
-
-It is released under the same license as the WebM project. See
-https://www.webmproject.org/license/software/ or the "COPYING" file for details.
-An additional intellectual property rights grant can be found in the file
-PATENTS.
-
-## Building
-
-See the [building documentation](doc/building.md).
-
-## Encoding and Decoding Tools
-
-The examples/ directory contains tools to encode and decode images and
-animations, view information about WebP images, and more. See the
-[tools documentation](doc/tools.md).
-
-## APIs
-
-See the [APIs documentation](doc/api.md), and API usage examples in the
-`examples/` directory.
-
-## Bugs
-
-Please report all bugs to the issue tracker: https://bugs.chromium.org/p/webp
-
-Patches welcome! See [how to contribute](CONTRIBUTING.md).
-
-## Discuss
-
-Email: webp-discuss@webmproject.org
-
-Web: https://groups.google.com/a/webmproject.org/group/webp-discuss
--- a/README.mux
+++ b/README.mux
@@ -0,0 +1,258 @@
+          __   __  ____  ____  ____  __ __  _     __ __
+         /  \\/  \/  _ \/  _ \/  _ \/  \  \/ \___/_ / _\
+         \       /   __/  _  \   __/      /  /  (_/  /__
+          \__\__/\_____/_____/__/  \__//_/\_____/__/___/v1.2.1
+
+
+Description:
+============
+
+WebPMux: set of two libraries 'Mux' and 'Demux' for creation, extraction and
+manipulation of an extended format WebP file, which can have features like
+color profile, metadata and animation. Reference command-line tools 'webpmux'
+and 'vwebp' as well as the WebP container specification
+'doc/webp-container-spec.txt' are also provided in this package.
+
+WebP Mux tool:
+==============
+
+The examples/ directory contains a tool (webpmux) for manipulating WebP
+files. The webpmux tool can be used to create an extended format WebP file and
+also to extract or strip relevant data from such a file.
+
+A list of options is available using the -help command line flag:
+
+> webpmux -help
+Usage: webpmux -get GET_OPTIONS INPUT -o OUTPUT
+       webpmux -set SET_OPTIONS INPUT -o OUTPUT
+       webpmux -duration DURATION_OPTIONS [-duration ...]
+               INPUT -o OUTPUT
+       webpmux -strip STRIP_OPTIONS INPUT -o OUTPUT
+       webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]
+               [-bgcolor BACKGROUND_COLOR] -o OUTPUT
+       webpmux -info INPUT
+       webpmux [-h|-help]
+       webpmux -version
+       webpmux argument_file_name
+
+GET_OPTIONS:
+ Extract relevant data:
+   icc       get ICC profile
+   exif      get EXIF metadata
+   xmp       get XMP metadata
+   frame n   get nth frame
+
+SET_OPTIONS:
+ Set color profile/metadata/parameters:
+   loop LOOP_COUNT            set the loop count
+   bgcolor BACKGROUND_COLOR   set the animation background color
+   icc  file.icc              set ICC profile
+   exif file.exif             set EXIF metadata
+   xmp  file.xmp              set XMP metadata
+   where:    'file.icc' contains the ICC profile to be set,
+             'file.exif' contains the EXIF metadata to be set
+             'file.xmp' contains the XMP metadata to be set
+
+DURATION_OPTIONS:
+ Set duration of selected frames:
+   duration            set duration for each frames
+   duration,frame      set duration of a particular frame
+   duration,start,end  set duration of frames in the
+                        interval [start,end])
+   where: 'duration' is the duration in milliseconds
+          'start' is the start frame index
+          'end' is the inclusive end frame index
+           The special 'end' value '0' means: last frame.
+
+STRIP_OPTIONS:
+ Strip color profile/metadata:
+   icc       strip ICC profile
+   exif      strip EXIF metadata
+   xmp       strip XMP metadata
+
+FRAME_OPTIONS(i):
+ Create animation:
+   file_i +di+[xi+yi[+mi[bi]]]
+   where:    'file_i' is the i'th animation frame (WebP format),
+             'di' is the pause duration before next frame,
+             'xi','yi' specify the image offset for this frame,
+             'mi' is the dispose method for this frame (0 or 1),
+             'bi' is the blending method for this frame (+b or -b)
+
+LOOP_COUNT:
+ Number of times to repeat the animation.
+ Valid range is 0 to 65535 [Default: 0 (infinite)].
+
+BACKGROUND_COLOR:
+ Background color of the canvas.
+  A,R,G,B
+  where:    'A', 'R', 'G' and 'B' are integers in the range 0 to 255 specifying
+            the Alpha, Red, Green and Blue component values respectively
+            [Default: 255,255,255,255]
+
+INPUT & OUTPUT are in WebP format.
+
+Note: The nature of EXIF, XMP and ICC data is not checked and is assumed to be
+valid.
+
+Note: if a single file name is passed as the argument, the arguments will be
+tokenized from this file. The file name must not start with the character '-'.
+
+Visualization tool:
+===================
+
+The examples/ directory also contains a tool (vwebp) for viewing WebP files.
+It decodes the image and visualizes it using OpenGL. See the libwebp README
+for details on building and running this program.
+
+Mux API:
+========
+The Mux API contains methods for adding data to and reading data from WebP
+files. This API currently supports XMP/EXIF metadata, ICC profile and animation.
+Other features may be added in subsequent releases.
+
+Example#1 (pseudo code): Creating a WebPMux object with image data, color
+profile and XMP metadata.
+
+  int copy_data = 0;
+  WebPMux* mux = WebPMuxNew();
+  // ... (Prepare image data).
+  WebPMuxSetImage(mux, &image, copy_data);
+  // ... (Prepare ICC profile data).
+  WebPMuxSetChunk(mux, "ICCP", &icc_profile, copy_data);
+  // ... (Prepare XMP metadata).
+  WebPMuxSetChunk(mux, "XMP ", &xmp, copy_data);
+  // Get data from mux in WebP RIFF format.
+  WebPMuxAssemble(mux, &output_data);
+  WebPMuxDelete(mux);
+  // ... (Consume output_data; e.g. write output_data.bytes to file).
+  WebPDataClear(&output_data);
+
+
+Example#2 (pseudo code): Get image and color profile data from a WebP file.
+
+  int copy_data = 0;
+  // ... (Read data from file).
+  WebPMux* mux = WebPMuxCreate(&data, copy_data);
+  WebPMuxGetFrame(mux, 1, &image);
+  // ... (Consume image; e.g. call WebPDecode() to decode the data).
+  WebPMuxGetChunk(mux, "ICCP", &icc_profile);
+  // ... (Consume icc_profile).
+  WebPMuxDelete(mux);
+  free(data);
+
+
+For a detailed Mux API reference, please refer to the header file
+(src/webp/mux.h).
+
+Demux API:
+==========
+The Demux API enables extraction of images and extended format data from
+WebP files. This API currently supports reading of XMP/EXIF metadata, ICC
+profile and animated images. Other features may be added in subsequent
+releases.
+
+Code example: Demuxing WebP data to extract all the frames, ICC profile
+and EXIF/XMP metadata.
+
+  WebPDemuxer* demux = WebPDemux(&webp_data);
+  uint32_t width = WebPDemuxGetI(demux, WEBP_FF_CANVAS_WIDTH);
+  uint32_t height = WebPDemuxGetI(demux, WEBP_FF_CANVAS_HEIGHT);
+  // ... (Get information about the features present in the WebP file).
+  uint32_t flags = WebPDemuxGetI(demux, WEBP_FF_FORMAT_FLAGS);
+
+  // ... (Iterate over all frames).
+  WebPIterator iter;
+  if (WebPDemuxGetFrame(demux, 1, &iter)) {
+    do {
+      // ... (Consume 'iter'; e.g. Decode 'iter.fragment' with WebPDecode(),
+      // ... and get other frame properties like width, height, offsets etc.
+      // ... see 'struct WebPIterator' below for more info).
+    } while (WebPDemuxNextFrame(&iter));
+    WebPDemuxReleaseIterator(&iter);
+  }
+
+  // ... (Extract metadata).
+  WebPChunkIterator chunk_iter;
+  if (flags & ICCP_FLAG) WebPDemuxGetChunk(demux, "ICCP", 1, &chunk_iter);
+  // ... (Consume the ICC profile in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & EXIF_FLAG) WebPDemuxGetChunk(demux, "EXIF", 1, &chunk_iter);
+  // ... (Consume the EXIF metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & XMP_FLAG) WebPDemuxGetChunk(demux, "XMP ", 1, &chunk_iter);
+  // ... (Consume the XMP metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  WebPDemuxDelete(demux);
+
+
+For a detailed Demux API reference, please refer to the header file
+(src/webp/demux.h).
+
+AnimEncoder API:
+================
+The AnimEncoder API can be used to create animated WebP images.
+
+Code example:
+
+  WebPAnimEncoderOptions enc_options;
+  WebPAnimEncoderOptionsInit(&enc_options);
+  // ... (Tune 'enc_options' as needed).
+  WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &enc_options);
+  while(<there are more frames>) {
+    WebPConfig config;
+    WebPConfigInit(&config);
+    // ... (Tune 'config' as needed).
+    WebPAnimEncoderAdd(enc, frame, duration, &config);
+  }
+  WebPAnimEncoderAssemble(enc, webp_data);
+  WebPAnimEncoderDelete(enc);
+  // ... (Write the 'webp_data' to a file, or re-mux it further).
+
+
+For a detailed AnimEncoder API reference, please refer to the header file
+(src/webp/mux.h).
+
+AnimDecoder API:
+================
+This AnimDecoder API allows decoding (possibly) animated WebP images.
+
+Code Example:
+
+  WebPAnimDecoderOptions dec_options;
+  WebPAnimDecoderOptionsInit(&dec_options);
+  // Tune 'dec_options' as needed.
+  WebPAnimDecoder* dec = WebPAnimDecoderNew(webp_data, &dec_options);
+  WebPAnimInfo anim_info;
+  WebPAnimDecoderGetInfo(dec, &anim_info);
+  for (uint32_t i = 0; i < anim_info.loop_count; ++i) {
+    while (WebPAnimDecoderHasMoreFrames(dec)) {
+      uint8_t* buf;
+      int timestamp;
+      WebPAnimDecoderGetNext(dec, &buf, &timestamp);
+      // ... (Render 'buf' based on 'timestamp').
+      // ... (Do NOT free 'buf', as it is owned by 'dec').
+    }
+    WebPAnimDecoderReset(dec);
+  }
+  const WebPDemuxer* demuxer = WebPAnimDecoderGetDemuxer(dec);
+  // ... (Do something using 'demuxer'; e.g. get EXIF/XMP/ICC data).
+  WebPAnimDecoderDelete(dec);
+
+For a detailed AnimDecoder API reference, please refer to the header file
+(src/webp/demux.h).
+
+
+Bugs:
+=====
+
+Please report all bugs to the issue tracker:
+    https://bugs.chromium.org/p/webp
+Patches welcome! See this page to get started:
+    http://www.webmproject.org/code/contribute/submitting-patches/
+
+Discuss:
+========
+
+Email: webp-discuss@webmproject.org
+Web: http://groups.google.com/a/webmproject.org/group/webp-discuss
--- a/README.webp_js
+++ b/README.webp_js
@@ -0,0 +1,75 @@
+     __   __ ____ ____ ____     __  ____
+    /  \\/  \  _ \  _ \  _ \   (__)/  __\
+    \       /  __/ _  \  __/   _)  \_   \
+     \__\__/_____/____/_/     /____/____/
+
+Description:
+============
+
+This file describes the compilation of libwebp into a JavaScript decoder
+using Emscripten and CMake.
+
+ - install the Emscripten SDK following the procedure described at:
+   https://emscripten.org/docs/getting_started/downloads.html#installation-instructions-using-the-emsdk-recommended
+   After installation, you should have some global variable positioned to the
+   location of the SDK. In particular, $EMSDK should point to the
+   top-level directory containing Emscripten tools.
+
+ - configure the project 'WEBP_JS' with CMake using:
+
+ cd webp_js && \
+ emcmake cmake -DWEBP_BUILD_WEBP_JS=ON \
+       ../
+
+ - compile webp.js using 'emmake make'.
+
+ - that's it! Upon completion, you should have the webp.js and
+   webp.wasm files generated.
+
+The callable JavaScript function is WebPToSDL(), which decodes a raw WebP
+bitstream into a canvas. See webp_js/index.html for a simple usage sample
+(see below for instructions).
+
+Demo HTML page:
+===============
+
+   The HTML page webp_js/index.html requires an HTTP server to serve the WebP
+   image example. It's easy to just use Python for that.
+
+cd webp_js && python -m SimpleHTTPServer 8080
+
+and then navigate to http://localhost:8080 in your favorite browser.
+
+
+Web-Assembly (WASM) version:
+============================
+
+  CMakeLists.txt is configured to build the WASM version when using
+  the option WEBP_BUILD_WEBP_JS=ON. The compilation step will assemble
+  the files 'webp_wasm.js', 'webp_wasm.wasm' in the webp_js/ directory.
+  See webp_js/index_wasm.html for a simple demo page using the WASM version
+  of the library.
+
+  You will need a fairly recent version of Emscripten (at least 2.0.18,
+  latest-upstream is recommended) and of your WASM-enabled browser to run this
+  version.
+
+Caveat:
+=======
+
+  - First decoding using the library is usually slower, due to just-in-time
+    compilation.
+
+  - Some versions of llvm produce the following compile error when SSE2 is
+    enabled.
+
+"Unsupported:   %516 = bitcast <8 x i16> %481 to i128
+ LLVM ERROR: BitCast Instruction not yet supported for integer types larger than 64 bits"
+
+    The corresponding Emscripten bug is at:
+    https://github.com/kripken/emscripten/issues/3788
+
+    Therefore, SSE2 optimization is currently disabled in CMakeLists.txt.
+
+  - If WEBP_ENABLE_SIMD is set to 1 the JavaScript version (webp.js) will be
+    disabled as wasm2js does not support SIMD.
--- a/build.gradle
+++ b/build.gradle
@@ -105,13 +105,6 @@ model {
      sources {
        c {
          source {
-            srcDir "sharpyuv"
-            include "sharpyuv.c"
-            include "sharpyuv_csp.c"
-            include "sharpyuv_dsp.c"
-            include "sharpyuv_gamma.c"
-            include "sharpyuv_neon.c"
-            include "sharpyuv_sse2.c"
            srcDir "src/dec"
            include "alpha_dec.c"
            include "buffer_dec.c"
--- a/cmake/config.h.in
+++ b/cmake/config.h.in
@@ -108,6 +108,9 @@
 /* Set to 1 if JPEG library is installed */
 #cmakedefine WEBP_HAVE_JPEG 1

+/* Set to 1 if Wasm SIMD is supported */
+#cmakedefine WEBP_HAVE_WASM_SIMD
+
 /* Set to 1 if NEON is supported */
 #cmakedefine WEBP_HAVE_NEON

--- a/cmake/cpu.cmake
+++ b/cmake/cpu.cmake
@@ -36,9 +36,9 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
 endfunction()

 # those are included in the names of WEBP_USE_* in c++ code.
-set(WEBP_SIMD_FLAGS "SSE41;SSE2;MIPS32;MIPS_DSP_R2;NEON;MSA")
+set(WEBP_SIMD_FLAGS "WASM_SIMD;SSE41;SSE2;MIPS32;MIPS_DSP_R2;NEON;MSA")
 set(WEBP_SIMD_FILE_EXTENSIONS
-    "_sse41.c;_sse2.c;_mips32.c;_mips_dsp_r2.c;_neon.c;_msa.c")
+    "_wasm.c;_sse41.c;_sse2.c;_mips32.c;_mips_dsp_r2.c;_neon.c;_msa.c")
 if(MSVC AND CMAKE_C_COMPILER_ID STREQUAL "MSVC")
  # With at least Visual Studio 12 (2013)+ /arch is not necessary to build SSE2
  # or SSE4 code unless a lesser /arch is forced. MSVC does not have a SSE4
@@ -53,9 +53,9 @@ if(MSVC AND CMAKE_C_COMPILER_ID STREQUAL "MSVC")
  set(SIMD_DISABLE_FLAGS)
 else()
  set(SIMD_ENABLE_FLAGS
-      "-msse4.1;-msse2;-mips32;-mdspr2;-mfpu=neon;-mmsa")
+      "-msimd128;-msse4.1;-msse2;-mips32;-mdspr2;-mfpu=neon;-mmsa")
  set(SIMD_DISABLE_FLAGS
-      "-mno-sse4.1;-mno-sse2;;-mno-dspr2;;-mno-msa")
+      "-mno-simd128;-mno-sse4.1;-mno-sse2;;-mno-dspr2;;-mno-msa")
 endif()

 set(WEBP_SIMD_FILES_TO_NOT_INCLUDE)
@@ -77,7 +77,12 @@ math(EXPR WEBP_SIMD_FLAGS_RANGE "${WEBP_SIMD_FLAGS_LENGTH} - 1")
 foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
  # With Emscripten 2.0.9 -msimd128 -mfpu=neon will enable NEON, but the
  # source will fail to compile.
-  if(EMSCRIPTEN AND ${I_SIMD} GREATER_EQUAL 2)
+  if(EMSCRIPTEN AND ${I_SIMD} GREATER_EQUAL 5)
+    break()
+  endif()
+  # Emscripten supports SSE via compat headers, if WEBP_ENABLED_WASM_SIMD is
+  # specified skip testing those (because it will succeed).
+  if (EMSCRIPTEN AND ${I_SIMD} GREATER_EQUAL 1 AND ${WEBP_ENABLE_WASM_SIMD})
    break()
  endif()

@@ -91,6 +96,7 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
  webp_check_compiler_flag(${WEBP_SIMD_FLAG} ${WEBP_ENABLE_SIMD})
  if(NOT WEBP_HAVE_${WEBP_SIMD_FLAG})
    list(GET SIMD_ENABLE_FLAGS ${I_SIMD} SIMD_COMPILE_FLAG)
+    # This enables using Emscripten's SSE compatibility headers.
    if(EMSCRIPTEN)
      set(SIMD_COMPILE_FLAG "-msimd128 ${SIMD_COMPILE_FLAG}")
    endif()
--- a/cmake/deps.cmake
+++ b/cmake/deps.cmake
@@ -45,7 +45,7 @@ if(WEBP_USE_THREAD)
        }
      " FLAG_HAVE_PTHREAD_PRIO_INHERIT)
    set(HAVE_PTHREAD_PRIO_INHERIT ${FLAG_HAVE_PTHREAD_PRIO_INHERIT})
-    list(APPEND WEBP_DEP_LIBRARIES Threads::Threads)
+    list(APPEND WEBP_DEP_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
  endif()
  set(WEBP_USE_THREAD ${Threads_FOUND})
 endif()
@@ -74,11 +74,6 @@ endif()
 set(WEBP_DEP_IMG_LIBRARIES)
 set(WEBP_DEP_IMG_INCLUDE_DIRS)
 foreach(I_LIB PNG JPEG TIFF)
-  # Disable tiff when compiling in static mode as it is failing on Ubuntu.
-  if(WEBP_LINK_STATIC AND ${I_LIB} STREQUAL "TIFF")
-    message("TIFF is disabled when statically linking.")
-    continue()
-  endif()
  find_package(${I_LIB})
  set(WEBP_HAVE_${I_LIB} ${${I_LIB}_FOUND})
  if(${I_LIB}_FOUND)
--- a/configure.ac
+++ b/configure.ac
@@ -1,6 +1,6 @@
-AC_INIT([libwebp], [1.2.3],
+AC_INIT([libwebp], [1.2.1],
        [https://bugs.chromium.org/p/webp],,
-        [https://developers.google.com/speed/webp])
+        [http://developers.google.com/speed/webp])
 AC_CANONICAL_HOST
 AC_PREREQ([2.60])
 AM_INIT_AUTOMAKE([-Wall foreign subdir-objects])
@@ -9,8 +9,7 @@ dnl === automake >= 1.12 requires this for 'unusual archivers' support.
 dnl === it must occur before LT_INIT (AC_PROG_LIBTOOL).
 m4_ifdef([AM_PROG_AR], [AM_PROG_AR])

-dnl === AC_PROG_LIBTOOL is deprecated.
-m4_ifdef([LT_INIT], [LT_INIT], [AC_PROG_LIBTOOL])
+AC_PROG_LIBTOOL
 AC_PROG_SED
 AM_PROG_CC_C_O

@@ -28,8 +27,7 @@ AC_ARG_ENABLE([everything],
              AS_HELP_STRING([--enable-everything],
                             [Enable all optional targets. These can still be
                              disabled with --disable-target]),
-              [SET_IF_UNSET([enable_libsharpyuv], [$enableval])
-               SET_IF_UNSET([enable_libwebpdecoder], [$enableval])
+              [SET_IF_UNSET([enable_libwebpdecoder], [$enableval])
               SET_IF_UNSET([enable_libwebpdemux], [$enableval])
               SET_IF_UNSET([enable_libwebpextras], [$enableval])
               SET_IF_UNSET([enable_libwebpmux], [$enableval])])
@@ -753,7 +751,6 @@ AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_HEADERS([src/webp/config.h])
 AC_CONFIG_FILES([Makefile src/Makefile man/Makefile \
                 examples/Makefile extras/Makefile imageio/Makefile \
-                 sharpyuv/Makefile \
                 src/dec/Makefile src/enc/Makefile src/dsp/Makefile \
                 src/demux/Makefile src/mux/Makefile \
                 src/utils/Makefile \
--- a/doc/README
+++ b/doc/README
@@ -0,0 +1,29 @@
+
+Generate libwebp Container Spec Docs from Text Source
+=====================================================
+
+HTML generation requires kramdown [1], easily installed as a
+rubygem [2].  Rubygems installation should satisfy dependencies
+automatically.
+
+[1]: http://kramdown.rubyforge.org/
+[2]: http://rubygems.org/
+
+HTML generation can then be done from the project root:
+
+$ kramdown doc/webp-container-spec.txt --template doc/template.html > \
+  doc/output/webp-container-spec.html
+
+kramdown can optionally syntax highlight code blocks, using CodeRay [3],
+a dependency of kramdown that rubygems will install automatically.  The
+following will apply inline CSS styling; an external stylesheet is not
+needed.
+
+$ kramdown doc/webp-lossless-bitstream-spec.txt --template \
+  doc/template.html --coderay-css style --coderay-line-numbers ' ' \
+  --coderay-default-lang c > \
+  doc/output/webp-lossless-bitstream-spec.html
+
+Optimally, use kramdown 0.13.7 or newer if syntax highlighting desired.
+
+[3]: http://coderay.rubychan.de/
--- a/doc/api.md
+++ b/doc/api.md
@@ -1,385 +0,0 @@
-# WebP APIs
-
-## Encoding API
-
-The main encoding functions are available in the header src/webp/encode.h
-
-The ready-to-use ones are:
-
-```c
-size_t WebPEncodeRGB(const uint8_t* rgb, int width, int height, int stride,
-                     float quality_factor, uint8_t** output);
-size_t WebPEncodeBGR(const uint8_t* bgr, int width, int height, int stride,
-                     float quality_factor, uint8_t** output);
-size_t WebPEncodeRGBA(const uint8_t* rgba, int width, int height, int stride,
-                      float quality_factor, uint8_t** output);
-size_t WebPEncodeBGRA(const uint8_t* bgra, int width, int height, int stride,
-                      float quality_factor, uint8_t** output);
-```
-
-They will convert raw RGB samples to a WebP data. The only control supplied is
-the quality factor.
-
-There are some variants for using the lossless format:
-
-```c
-size_t WebPEncodeLosslessRGB(const uint8_t* rgb, int width, int height,
-                             int stride, uint8_t** output);
-size_t WebPEncodeLosslessBGR(const uint8_t* bgr, int width, int height,
-                             int stride, uint8_t** output);
-size_t WebPEncodeLosslessRGBA(const uint8_t* rgba, int width, int height,
-                              int stride, uint8_t** output);
-size_t WebPEncodeLosslessBGRA(const uint8_t* bgra, int width, int height,
-                              int stride, uint8_t** output);
-```
-
-Of course in this case, no quality factor is needed since the compression occurs
-without loss of the input values, at the expense of larger output sizes.
-
-### Advanced encoding API
-
-A more advanced API is based on the WebPConfig and WebPPicture structures.
-
-WebPConfig contains the encoding settings and is not tied to a particular
-picture. WebPPicture contains input data, on which some WebPConfig will be used
-for compression. The encoding flow looks like:
-
-```c
-#include <webp/encode.h>
-
-// Setup a config, starting form a preset and tuning some additional
-// parameters
-WebPConfig config;
-if (!WebPConfigPreset(&config, WEBP_PRESET_PHOTO, quality_factor)) {
-  return 0;   // version error
-}
-// ... additional tuning
-config.sns_strength = 90;
-config.filter_sharpness = 6;
-config_error = WebPValidateConfig(&config);  // not mandatory, but useful
-
-// Setup the input data
-WebPPicture pic;
-if (!WebPPictureInit(&pic)) {
-  return 0;  // version error
-}
-pic.width = width;
-pic.height = height;
-// allocated picture of dimension width x height
-if (!WebPPictureAlloc(&pic)) {
-  return 0;   // memory error
-}
-// at this point, 'pic' has been initialized as a container,
-// and can receive the Y/U/V samples.
-// Alternatively, one could use ready-made import functions like
-// WebPPictureImportRGB(), which will take care of memory allocation.
-// In any case, past this point, one will have to call
-// WebPPictureFree(&pic) to reclaim memory.
-
-// Set up a byte-output write method. WebPMemoryWriter, for instance.
-WebPMemoryWriter wrt;
-WebPMemoryWriterInit(&wrt);     // initialize 'wrt'
-
-pic.writer = MyFileWriter;
-pic.custom_ptr = my_opaque_structure_to_make_MyFileWriter_work;
-
-// Compress!
-int ok = WebPEncode(&config, &pic);   // ok = 0 => error occurred!
-WebPPictureFree(&pic);  // must be called independently of the 'ok' result.
-
-// output data should have been handled by the writer at that point.
-// -> compressed data is the memory buffer described by wrt.mem / wrt.size
-
-// deallocate the memory used by compressed data
-WebPMemoryWriterClear(&wrt);
-```
-
-## Decoding API
-
-This is mainly just one function to call:
-
-```c
-#include "webp/decode.h"
-uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
-                       int* width, int* height);
-```
-
-Please have a look at the file src/webp/decode.h for the details. There are
-variants for decoding in BGR/RGBA/ARGB/BGRA order, along with decoding to raw
-Y'CbCr samples. One can also decode the image directly into a pre-allocated
-buffer.
-
-To detect a WebP file and gather the picture's dimensions, the function:
-
-```c
-int WebPGetInfo(const uint8_t* data, size_t data_size,
-                int* width, int* height);
-```
-
-is supplied. No decoding is involved when using it.
-
-### Incremental decoding API
-
-In the case when data is being progressively transmitted, pictures can still be
-incrementally decoded using a slightly more complicated API. Decoder state is
-stored into an instance of the WebPIDecoder object. This object can be created
-with the purpose of decoding either RGB or Y'CbCr samples. For instance:
-
-```c
-WebPDecBuffer buffer;
-WebPInitDecBuffer(&buffer);
-buffer.colorspace = MODE_BGR;
-...
-WebPIDecoder* idec = WebPINewDecoder(&buffer);
-```
-
-As data is made progressively available, this incremental-decoder object can be
-used to decode the picture further. There are two (mutually exclusive) ways to
-pass freshly arrived data:
-
-either by appending the fresh bytes:
-
-```c
-WebPIAppend(idec, fresh_data, size_of_fresh_data);
-```
-
-or by just mentioning the new size of the transmitted data:
-
-```c
-WebPIUpdate(idec, buffer, size_of_transmitted_buffer);
-```
-
-Note that 'buffer' can be modified between each call to WebPIUpdate, in
-particular when the buffer is resized to accommodate larger data.
-
-These functions will return the decoding status: either VP8_STATUS_SUSPENDED if
-decoding is not finished yet or VP8_STATUS_OK when decoding is done. Any other
-status is an error condition.
-
-The 'idec' object must always be released (even upon an error condition) by
-calling: WebPDelete(idec).
-
-To retrieve partially decoded picture samples, one must use the corresponding
-method: WebPIDecGetRGB or WebPIDecGetYUVA. It will return the last displayable
-pixel row.
-
-Lastly, note that decoding can also be performed into a pre-allocated pixel
-buffer. This buffer must be passed when creating a WebPIDecoder, calling
-WebPINewRGB() or WebPINewYUVA().
-
-Please have a look at the src/webp/decode.h header for further details.
-
-### Advanced Decoding API
-
-WebP decoding supports an advanced API which provides on-the-fly cropping and
-rescaling, something of great usefulness on memory-constrained environments like
-mobile phones. Basically, the memory usage will scale with the output's size,
-not the input's, when one only needs a quick preview or a zoomed in portion of
-an otherwise too-large picture. Some CPU can be saved too, incidentally.
-
-```c
-// A) Init a configuration object
-WebPDecoderConfig config;
-CHECK(WebPInitDecoderConfig(&config));
-
-// B) optional: retrieve the bitstream's features.
-CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
-
-// C) Adjust 'config' options, if needed
-config.options.no_fancy_upsampling = 1;
-config.options.use_scaling = 1;
-config.options.scaled_width = scaledWidth();
-config.options.scaled_height = scaledHeight();
-// etc.
-
-// D) Specify 'config' output options for specifying output colorspace.
-// Optionally the external image decode buffer can also be specified.
-config.output.colorspace = MODE_BGRA;
-// Optionally, the config.output can be pointed to an external buffer as
-// well for decoding the image. This externally supplied memory buffer
-// should be big enough to store the decoded picture.
-config.output.u.RGBA.rgba = (uint8_t*) memory_buffer;
-config.output.u.RGBA.stride = scanline_stride;
-config.output.u.RGBA.size = total_size_of_the_memory_buffer;
-config.output.is_external_memory = 1;
-
-// E) Decode the WebP image. There are two variants w.r.t decoding image.
-// The first one (E.1) decodes the full image and the second one (E.2) is
-// used to incrementally decode the image using small input buffers.
-// Any one of these steps can be used to decode the WebP image.
-
-// E.1) Decode full image.
-CHECK(WebPDecode(data, data_size, &config) == VP8_STATUS_OK);
-
-// E.2) Decode image incrementally.
-WebPIDecoder* const idec = WebPIDecode(NULL, NULL, &config);
-CHECK(idec != NULL);
-while (bytes_remaining > 0) {
-  VP8StatusCode status = WebPIAppend(idec, input, bytes_read);
-  if (status == VP8_STATUS_OK || status == VP8_STATUS_SUSPENDED) {
-    bytes_remaining -= bytes_read;
-  } else {
-    break;
-  }
-}
-WebPIDelete(idec);
-
-// F) Decoded image is now in config.output (and config.output.u.RGBA).
-// It can be saved, displayed or otherwise processed.
-
-// G) Reclaim memory allocated in config's object. It's safe to call
-// this function even if the memory is external and wasn't allocated
-// by WebPDecode().
-WebPFreeDecBuffer(&config.output);
-```
-
-## Webp Mux
-
-WebPMux is a set of two libraries 'Mux' and 'Demux' for creation, extraction and
-manipulation of an extended format WebP file, which can have features like color
-profile, metadata and animation. Reference command-line tools `webpmux` and
-`vwebp` as well as the WebP container specification
-'doc/webp-container-spec.txt' are also provided in this package, see the
-[tools documentation](tools.md).
-
-### Mux API
-
-The Mux API contains methods for adding data to and reading data from WebP
-files. This API currently supports XMP/EXIF metadata, ICC profile and animation.
-Other features may be added in subsequent releases.
-
-Example#1 (pseudo code): Creating a WebPMux object with image data, color
-profile and XMP metadata.
-
-```c
-int copy_data = 0;
-WebPMux* mux = WebPMuxNew();
-// ... (Prepare image data).
-WebPMuxSetImage(mux, &image, copy_data);
-// ... (Prepare ICC profile data).
-WebPMuxSetChunk(mux, "ICCP", &icc_profile, copy_data);
-// ... (Prepare XMP metadata).
-WebPMuxSetChunk(mux, "XMP ", &xmp, copy_data);
-// Get data from mux in WebP RIFF format.
-WebPMuxAssemble(mux, &output_data);
-WebPMuxDelete(mux);
-// ... (Consume output_data; e.g. write output_data.bytes to file).
-WebPDataClear(&output_data);
-```
-
-Example#2 (pseudo code): Get image and color profile data from a WebP file.
-
-```c
-int copy_data = 0;
-// ... (Read data from file).
-WebPMux* mux = WebPMuxCreate(&data, copy_data);
-WebPMuxGetFrame(mux, 1, &image);
-// ... (Consume image; e.g. call WebPDecode() to decode the data).
-WebPMuxGetChunk(mux, "ICCP", &icc_profile);
-// ... (Consume icc_profile).
-WebPMuxDelete(mux);
-free(data);
-```
-
-For a detailed Mux API reference, please refer to the header file
-(src/webp/mux.h).
-
-### Demux API
-
-The Demux API enables extraction of images and extended format data from WebP
-files. This API currently supports reading of XMP/EXIF metadata, ICC profile and
-animated images. Other features may be added in subsequent releases.
-
-Code example: Demuxing WebP data to extract all the frames, ICC profile and
-EXIF/XMP metadata.
-
-```c
-WebPDemuxer* demux = WebPDemux(&webp_data);
-uint32_t width = WebPDemuxGetI(demux, WEBP_FF_CANVAS_WIDTH);
-uint32_t height = WebPDemuxGetI(demux, WEBP_FF_CANVAS_HEIGHT);
-// ... (Get information about the features present in the WebP file).
-uint32_t flags = WebPDemuxGetI(demux, WEBP_FF_FORMAT_FLAGS);
-
-// ... (Iterate over all frames).
-WebPIterator iter;
-if (WebPDemuxGetFrame(demux, 1, &iter)) {
-  do {
-    // ... (Consume 'iter'; e.g. Decode 'iter.fragment' with WebPDecode(),
-    // ... and get other frame properties like width, height, offsets etc.
-    // ... see 'struct WebPIterator' below for more info).
-  } while (WebPDemuxNextFrame(&iter));
-  WebPDemuxReleaseIterator(&iter);
-}
-
-// ... (Extract metadata).
-WebPChunkIterator chunk_iter;
-if (flags & ICCP_FLAG) WebPDemuxGetChunk(demux, "ICCP", 1, &chunk_iter);
-// ... (Consume the ICC profile in 'chunk_iter.chunk').
-WebPDemuxReleaseChunkIterator(&chunk_iter);
-if (flags & EXIF_FLAG) WebPDemuxGetChunk(demux, "EXIF", 1, &chunk_iter);
-// ... (Consume the EXIF metadata in 'chunk_iter.chunk').
-WebPDemuxReleaseChunkIterator(&chunk_iter);
-if (flags & XMP_FLAG) WebPDemuxGetChunk(demux, "XMP ", 1, &chunk_iter);
-// ... (Consume the XMP metadata in 'chunk_iter.chunk').
-WebPDemuxReleaseChunkIterator(&chunk_iter);
-WebPDemuxDelete(demux);
-```
-
-For a detailed Demux API reference, please refer to the header file
-(src/webp/demux.h).
-
-## AnimEncoder API
-
-The AnimEncoder API can be used to create animated WebP images.
-
-Code example:
-
-```c
-WebPAnimEncoderOptions enc_options;
-WebPAnimEncoderOptionsInit(&enc_options);
-// ... (Tune 'enc_options' as needed).
-WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &enc_options);
-while(<there are more frames>) {
-  WebPConfig config;
-  WebPConfigInit(&config);
-  // ... (Tune 'config' as needed).
-  WebPAnimEncoderAdd(enc, frame, duration, &config);
-}
-WebPAnimEncoderAssemble(enc, webp_data);
-WebPAnimEncoderDelete(enc);
-// ... (Write the 'webp_data' to a file, or re-mux it further).
-```
-
-For a detailed AnimEncoder API reference, please refer to the header file
-(src/webp/mux.h).
-
-## AnimDecoder API
-
-This AnimDecoder API allows decoding (possibly) animated WebP images.
-
-Code Example:
-
-```c
-WebPAnimDecoderOptions dec_options;
-WebPAnimDecoderOptionsInit(&dec_options);
-// Tune 'dec_options' as needed.
-WebPAnimDecoder* dec = WebPAnimDecoderNew(webp_data, &dec_options);
-WebPAnimInfo anim_info;
-WebPAnimDecoderGetInfo(dec, &anim_info);
-for (uint32_t i = 0; i < anim_info.loop_count; ++i) {
-  while (WebPAnimDecoderHasMoreFrames(dec)) {
-    uint8_t* buf;
-    int timestamp;
-    WebPAnimDecoderGetNext(dec, &buf, &timestamp);
-    // ... (Render 'buf' based on 'timestamp').
-    // ... (Do NOT free 'buf', as it is owned by 'dec').
-  }
-  WebPAnimDecoderReset(dec);
-}
-const WebPDemuxer* demuxer = WebPAnimDecoderGetDemuxer(dec);
-// ... (Do something using 'demuxer'; e.g. get EXIF/XMP/ICC data).
-WebPAnimDecoderDelete(dec);
-```
-
-For a detailed AnimDecoder API reference, please refer to the header file
-(src/webp/demux.h).
--- a/doc/building.md
+++ b/doc/building.md
@@ -1,213 +0,0 @@
-# Building
-
-## Windows build
-
-By running:
-
-```batch
-nmake /f Makefile.vc CFG=release-static RTLIBCFG=static OBJDIR=output
-```
-
-the directory `output\release-static\(x64|x86)\bin` will contain the tools
-cwebp.exe and dwebp.exe. The directory `output\release-static\(x64|x86)\lib`
-will contain the libwebp static library. The target architecture (x86/x64) is
-detected by Makefile.vc from the Visual Studio compiler (cl.exe) available in
-the system path.
-
-## Unix build using makefile.unix
-
-On platforms with GNU tools installed (gcc and make), running
-
-```shell
-make -f makefile.unix
-```
-
-will build the binaries examples/cwebp and examples/dwebp, along with the static
-library src/libwebp.a. No system-wide installation is supplied, as this is a
-simple alternative to the full installation system based on the autoconf tools
-(see below). Please refer to makefile.unix for additional details and
-customizations.
-
-## Using autoconf tools
-
-Prerequisites: a compiler (e.g., gcc), make, autoconf, automake, libtool.
-
-On a Debian-like system the following should install everything you need for a
-minimal build:
-
-```shell
-$ sudo apt-get install gcc make autoconf automake libtool
-```
-
-When building from git sources, you will need to run autogen.sh to generate the
-configure script.
-
-```shell
-./configure
-make
-make install
-```
-
-should be all you need to have the following files
-
-```
-/usr/local/include/webp/decode.h
-/usr/local/include/webp/encode.h
-/usr/local/include/webp/types.h
-/usr/local/lib/libwebp.*
-/usr/local/bin/cwebp
-/usr/local/bin/dwebp
-```
-
-installed.
-
-Note: A decode-only library, libwebpdecoder, is available using the
-`--enable-libwebpdecoder` flag. The encode library is built separately and can
-be installed independently using a minor modification in the corresponding
-Makefile.am configure files (see comments there). See `./configure --help` for
-more options.
-
-## Building for MIPS Linux
-
-MIPS Linux toolchain stable available releases can be found at:
-https://community.imgtec.com/developers/mips/tools/codescape-mips-sdk/available-releases/
-
-```shell
-# Add toolchain to PATH
-export PATH=$PATH:/path/to/toolchain/bin
-
-# 32-bit build for mips32r5 (p5600)
-HOST=mips-mti-linux-gnu
-MIPS_CFLAGS="-O3 -mips32r5 -mabi=32 -mtune=p5600 -mmsa -mfp64 \
-  -msched-weight -mload-store-pairs -fPIE"
-MIPS_LDFLAGS="-mips32r5 -mabi=32 -mmsa -mfp64 -pie"
-
-# 64-bit build for mips64r6 (i6400)
-HOST=mips-img-linux-gnu
-MIPS_CFLAGS="-O3 -mips64r6 -mabi=64 -mtune=i6400 -mmsa -mfp64 \
-  -msched-weight -mload-store-pairs -fPIE"
-MIPS_LDFLAGS="-mips64r6 -mabi=64 -mmsa -mfp64 -pie"
-
-./configure --host=${HOST} --build=`config.guess` \
-  CC="${HOST}-gcc -EL" \
-  CFLAGS="$MIPS_CFLAGS" \
-  LDFLAGS="$MIPS_LDFLAGS"
-make
-make install
-```
-
-## CMake
-
-With CMake, you can compile libwebp, cwebp, dwebp, gif2webp, img2webp, webpinfo
-and the JS bindings.
-
-Prerequisites: a compiler (e.g., gcc with autotools) and CMake.
-
-On a Debian-like system the following should install everything you need for a
-minimal build:
-
-```shell
-$ sudo apt-get install build-essential cmake
-```
-
-When building from git sources, you will need to run cmake to generate the
-makefiles.
-
-```shell
-mkdir build && cd build && cmake ../
-make
-make install
-```
-
-If you also want any of the executables, you will need to enable them through
-CMake, e.g.:
-
-```shell
-cmake -DWEBP_BUILD_CWEBP=ON -DWEBP_BUILD_DWEBP=ON ../
-```
-
-or through your favorite interface (like ccmake or cmake-qt-gui).
-
-Use option `-DWEBP_UNICODE=ON` for Unicode support on Windows (with chcp 65001).
-
-Finally, once installed, you can also use WebP in your CMake project by doing:
-
-```cmake
-find_package(WebP)
-```
-
-which will define the CMake variables WebP_INCLUDE_DIRS and WebP_LIBRARIES.
-
-## Gradle
-
-The support for Gradle is minimal: it only helps you compile libwebp, cwebp and
-dwebp and webpmux_example.
-
-Prerequisites: a compiler (e.g., gcc with autotools) and gradle.
-
-On a Debian-like system the following should install everything you need for a
-minimal build:
-
-```shell
-$ sudo apt-get install build-essential gradle
-```
-
-When building from git sources, you will need to run the Gradle wrapper with the
-appropriate target, e.g. :
-
-```shell
-./gradlew buildAllExecutables
-```
-
-## SWIG bindings
-
-To generate language bindings from swig/libwebp.swig at least swig-1.3
-(http://www.swig.org) is required.
-
-Currently the following functions are mapped:
-
-Decode:
-
-```
-WebPGetDecoderVersion
-WebPGetInfo
-WebPDecodeRGBA
-WebPDecodeARGB
-WebPDecodeBGRA
-WebPDecodeBGR
-WebPDecodeRGB
-```
-
-Encode:
-
-```
-WebPGetEncoderVersion
-WebPEncodeRGBA
-WebPEncodeBGRA
-WebPEncodeRGB
-WebPEncodeBGR
-WebPEncodeLosslessRGBA
-WebPEncodeLosslessBGRA
-WebPEncodeLosslessRGB
-WebPEncodeLosslessBGR
-```
-
-See also the [swig documentation](../swig/README.md) for more detailed build
-instructions and usage examples.
-
-### Java bindings
-
-To build the swig-generated JNI wrapper code at least JDK-1.5 (or equivalent) is
-necessary for enum support. The output is intended to be a shared object / DLL
-that can be loaded via `System.loadLibrary("webp_jni")`.
-
-### Python bindings
-
-To build the swig-generated Python extension code at least Python 2.6 is
-required. Python < 2.6 may build with some minor changes to libwebp.swig or the
-generated code, but is untested.
-
-## Javascript decoder
-
-Libwebp can be compiled into a JavaScript decoder using Emscripten and CMake.
-See the [corresponding documentation](../README.md)
--- a/doc/specs_generation.md
+++ b/doc/specs_generation.md
@@ -1,26 +0,0 @@
-# Generate libwebp Container Spec Docs from Text Source
-
-HTML generation requires [kramdown](https://kramdown.gettalong.org/), easily
-installed as a [rubygem](https://rubygems.org/). Rubygems installation should
-satisfy dependencies automatically.
-
-HTML generation can then be done from the project root:
-
-```shell
-$ kramdown doc/webp-container-spec.txt --template doc/template.html > \
-  doc/output/webp-container-spec.html
-```
-
-kramdown can optionally syntax highlight code blocks, using
-[CodeRay](https://github.com/rubychan/coderay), a dependency of kramdown that
-rubygems will install automatically. The following will apply inline CSS
-styling; an external stylesheet is not needed.
-
-```shell
-$ kramdown doc/webp-lossless-bitstream-spec.txt --template \
-  doc/template.html --coderay-css style --coderay-line-numbers ' ' \
-  --coderay-default-lang c > \
-  doc/output/webp-lossless-bitstream-spec.html
-```
-
-Optimally, use kramdown 0.13.7 or newer if syntax highlighting desired.
--- a/doc/tools.md
+++ b/doc/tools.md
@@ -1,512 +0,0 @@
-# WebP tools
-
-## Encoding tool
-
-The examples/ directory contains tools for encoding (cwebp) and decoding (dwebp)
-images.
-
-The easiest use should look like:
-
-```shell
-cwebp input.png -q 80 -o output.webp
-```
-
-which will convert the input file to a WebP file using a quality factor of 80 on
-a 0->100 scale (0 being the lowest quality, 100 being the best. Default value is
-75).
-
-You might want to try the `-lossless` flag too, which will compress the source
-(in RGBA format) without any loss. The `-q` quality parameter will in this case
-control the amount of processing time spent trying to make the output file as
-small as possible.
-
-A longer list of options is available using the `-longhelp` command line flag:
-
-```shell
-> cwebp -longhelp
-Usage:
- cwebp [-preset <...>] [options] in_file [-o out_file]
-```
-
-If input size (-s) for an image is not specified, it is assumed to be a PNG,
-JPEG, TIFF or WebP file. Note: Animated PNG and WebP files are not supported.
-
-Options:
-
-```
-h / -help ............. short help
-H / -longhelp ......... long help
-q <float> ............. quality factor (0:small..100:big), default=75
-alpha_q <int> ......... transparency-compression quality (0..100),
-                         default=100
-preset <string> ....... preset setting, one of:
-                          default, photo, picture,
-                          drawing, icon, text
-   -preset must come first, as it overwrites other parameters
-z <int> ............... activates lossless preset with given
-                         level in [0:fast, ..., 9:slowest]
-
-m <int> ............... compression method (0=fast, 6=slowest), default=4
-segments <int> ........ number of segments to use (1..4), default=4
-size <int> ............ target size (in bytes)
-psnr <float> .......... target PSNR (in dB. typically: 42)
-
-s <int> <int> ......... input size (width x height) for YUV
-sns <int> ............. spatial noise shaping (0:off, 100:max), default=50
-f <int> ............... filter strength (0=off..100), default=60
-sharpness <int> ....... filter sharpness (0:most .. 7:least sharp), default=0
-strong ................ use strong filter instead of simple (default)
-nostrong .............. use simple filter instead of strong
-sharp_yuv ............. use sharper (and slower) RGB->YUV conversion
-partition_limit <int> . limit quality to fit the 512k limit on
-                         the first partition (0=no degradation ... 100=full)
-pass <int> ............ analysis pass number (1..10)
-qrange <min> <max> .... specifies the permissible quality range
-                         (default: 0 100)
-crop <x> <y> <w> <h> .. crop picture with the given rectangle
-resize <w> <h> ........ resize picture (*after* any cropping)
-mt .................... use multi-threading if available
-low_memory ............ reduce memory usage (slower encoding)
-map <int> ............. print map of extra info
-print_psnr ............ prints averaged PSNR distortion
-print_ssim ............ prints averaged SSIM distortion
-print_lsim ............ prints local-similarity distortion
-d <file.pgm> .......... dump the compressed output (PGM file)
-alpha_method <int> .... transparency-compression method (0..1), default=1
-alpha_filter <string> . predictive filtering for alpha plane,
-                         one of: none, fast (default) or best
-exact ................. preserve RGB values in transparent area, default=off
-blend_alpha <hex> ..... blend colors against background color
-                         expressed as RGB values written in
-                         hexadecimal, e.g. 0xc0e0d0 for red=0xc0
-                         green=0xe0 and blue=0xd0
-noalpha ............... discard any transparency information
-lossless .............. encode image losslessly, default=off
-near_lossless <int> ... use near-lossless image
-                         preprocessing (0..100=off), default=100
-hint <string> ......... specify image characteristics hint,
-                         one of: photo, picture or graph
-
-metadata <string> ..... comma separated list of metadata to
-                         copy from the input to the output if present.
-                         Valid values: all, none (default), exif, icc, xmp
-
-short ................. condense printed message
-quiet ................. don't print anything
-version ............... print version number and exit
-noasm ................. disable all assembly optimizations
-v ..................... verbose, e.g. print encoding/decoding times
-progress .............. report encoding progress
-```
-
-Experimental Options:
-
-```
-jpeg_like ............. roughly match expected JPEG size
-af .................... auto-adjust filter strength
-pre <int> ............. pre-processing filter
-```
-
-The main options you might want to try in order to further tune the visual
-quality are:
-
-preset -sns -f -m
-
-Namely:
-
-*   `preset` will set up a default encoding configuration targeting a particular
-    type of input. It should appear first in the list of options, so that
-    subsequent options can take effect on top of this preset. Default value is
-    'default'.
-*   `sns` will progressively turn on (when going from 0 to 100) some additional
-    visual optimizations (like: segmentation map re-enforcement). This option
-    will balance the bit allocation differently. It tries to take bits from the
-    "easy" parts of the picture and use them in the "difficult" ones instead.
-    Usually, raising the sns value (at fixed -q value) leads to larger files,
-    but with better quality. Typical value is around '75'.
-*   `f` option directly links to the filtering strength used by the codec's
-    in-loop processing. The higher the value, the smoother the highly-compressed
-    area will look. This is particularly useful when aiming at very small files.
-    Typical values are around 20-30. Note that using the option
-    -strong/-nostrong will change the type of filtering. Use "-f 0" to turn
-    filtering off.
-*   `m` controls the trade-off between encoding speed and quality. Default is 4.
-    You can try -m 5 or -m 6 to explore more (time-consuming) encoding
-    possibilities. A lower value will result in faster encoding at the expense
-    of quality.
-
-## Decoding tool
-
-There is a decoding sample in examples/dwebp.c which will take a .webp file and
-decode it to a PNG image file (amongst other formats). This is simply to
-demonstrate the use of the API. You can verify the file test.webp decodes to
-exactly the same as test_ref.ppm by using:
-
-```shell
-cd examples
-./dwebp test.webp -ppm -o test.ppm
-diff test.ppm test_ref.ppm
-```
-
-The full list of options is available using -h:
-
-```shell
-> dwebp -h
-Usage: dwebp in_file [options] [-o out_file]
-```
-
-Decodes the WebP image file to PNG format [Default]. Note: Animated WebP files
-are not supported.
-
-Use following options to convert into alternate image formats:
-
-```
-pam ......... save the raw RGBA samples as a color PAM
-ppm ......... save the raw RGB samples as a color PPM
-bmp ......... save as uncompressed BMP format
-tiff ........ save as uncompressed TIFF format
-pgm ......... save the raw YUV samples as a grayscale PGM
-               file with IMC4 layout
-yuv ......... save the raw YUV samples in flat layout
-```
-
-Other options are:
-
-```
-version ..... print version number and exit
-nofancy ..... don't use the fancy YUV420 upscaler
-nofilter .... disable in-loop filtering
-nodither .... disable dithering
-dither <d> .. dithering strength (in 0..100)
-alpha_dither  use alpha-plane dithering if needed
-mt .......... use multi-threading
-crop <x> <y> <w> <h> ... crop output with the given rectangle
-resize <w> <h> ......... resize output (*after* any cropping)
-flip ........ flip the output vertically
-alpha ....... only save the alpha plane
-incremental . use incremental decoding (useful for tests)
-h ........... this help message
-v ........... verbose (e.g. print encoding/decoding times)
-quiet ....... quiet mode, don't print anything
-noasm ....... disable all assembly optimizations
-```
-
-## WebP file analysis tool
-
-`webpinfo` can be used to print out the chunk level structure and bitstream
-header information of WebP files. It can also check if the files are of valid
-WebP format.
-
-Usage:
-
-```shell
-webpinfo [options] in_files
-```
-
-Note: there could be multiple input files; options must come before input files.
-
-Options:
-
-```
-version ........... Print version number and exit.
-quiet ............. Do not show chunk parsing information.
-diag .............. Show parsing error diagnosis.
-summary ........... Show chunk stats summary.
-bitstream_info .... Parse bitstream header.
-```
-
-## Visualization tool
-
-There's a little self-serve visualization tool called 'vwebp' under the
-examples/ directory. It uses OpenGL to open a simple drawing window and show a
-decoded WebP file. It's not yet integrated in the automake build system, but you
-can try to manually compile it using the recommendations below.
-
-Usage:
-
-```shell
-vwebp in_file [options]
-```
-
-Decodes the WebP image file and visualize it using OpenGL
-
-Options are:
-
-```
-version ..... print version number and exit
-noicc ....... don't use the icc profile if present
-nofancy ..... don't use the fancy YUV420 upscaler
-nofilter .... disable in-loop filtering
-dither <int>  dithering strength (0..100), default=50
-noalphadither disable alpha plane dithering
-usebgcolor .. display background color
-mt .......... use multi-threading
-info ........ print info
-h ........... this help message
-```
-
-Keyboard shortcuts:
-
-```
-'c' ................ toggle use of color profile
-'b' ................ toggle background color display
-'i' ................ overlay file information
-'d' ................ disable blending & disposal (debug)
-'q' / 'Q' / ESC .... quit
-```
-
-### Building
-
-Prerequisites:
-
-1.  OpenGL & OpenGL Utility Toolkit (GLUT)
-
-    Linux: `sudo apt-get install freeglut3-dev mesa-common-dev`
-
-    Mac + Xcode: These libraries should be available in the OpenGL / GLUT
-    frameworks.
-
-    Windows: http://freeglut.sourceforge.net/index.php#download
-
-2.  (Optional) qcms (Quick Color Management System)
-
-    1.  Download qcms from Mozilla / Chromium:
-        https://hg.mozilla.org/mozilla-central/file/0e7639e3bdfb/gfx/qcms
-        https://source.chromium.org/chromium/chromium/src/+/main:third_party/qcms/;drc=d4a2f8e1ed461d8fc05ed88d1ae2dc94c9773825
-    2.  Build and archive the source files as libqcms.a / qcms.lib
-    3.  Update makefile.unix / Makefile.vc
-        1.  Define WEBP_HAVE_QCMS
-        2.  Update include / library paths to reference the qcms directory.
-
-Build using makefile.unix / Makefile.vc:
-
-```shell
-$ make -f makefile.unix examples/vwebp
-> nmake /f Makefile.vc CFG=release-static \
-    ../obj/x64/release-static/bin/vwebp.exe
-```
-
-## Animation creation tool
-
-The utility `img2webp` can turn a sequence of input images (PNG, JPEG, ...) into
-an animated WebP file. It offers fine control over duration, encoding modes,
-etc.
-
-Usage:
-
-```shell
-img2webp [file_options] [[frame_options] frame_file]...
-```
-
-File-level options (only used at the start of compression):
-
-```
-min_size ............ minimize size
-loop <int> .......... loop count (default: 0, = infinite loop)
-kmax <int> .......... maximum number of frame between key-frames
-                        (0=only keyframes)
-kmin <int> .......... minimum number of frame between key-frames
-                        (0=disable key-frames altogether)
-mixed ............... use mixed lossy/lossless automatic mode
-v ................... verbose mode
-h ................... this help
-version ............. print version number and exit
-```
-
-Per-frame options (only used for subsequent images input):
-
-```
-d <int> ............. frame duration in ms (default: 100)
-lossless  ........... use lossless mode (default)
-lossy ... ........... use lossy mode
-q <float> ........... quality
-m <int> ............. method to use
-```
-
-example: `img2webp -loop 2 in0.png -lossy in1.jpg -d 80 in2.tiff -o out.webp`
-
-Note: if a single file name is passed as the argument, the arguments will be
-tokenized from this file. The file name must not start with the character '-'.
-
-## Animated GIF conversion
-
-Animated GIF files can be converted to WebP files with animation using the
-gif2webp utility available under examples/. The files can then be viewed using
-vwebp.
-
-Usage:
-
-```shell
-gif2webp [options] gif_file -o webp_file
-```
-
-Options:
-
-```
-h / -help ............. this help
-lossy ................. encode image using lossy compression
-mixed ................. for each frame in the image, pick lossy
-                         or lossless compression heuristically
-q <float> ............. quality factor (0:small..100:big)
-m <int> ............... compression method (0=fast, 6=slowest)
-min_size .............. minimize output size (default:off)
-                         lossless compression by default; can be
-                         combined with -q, -m, -lossy or -mixed
-                         options
-kmin <int> ............ min distance between key frames
-kmax <int> ............ max distance between key frames
-f <int> ............... filter strength (0=off..100)
-metadata <string> ..... comma separated list of metadata to
-                         copy from the input to the output if present
-                         Valid values: all, none, icc, xmp (default)
-loop_compatibility .... use compatibility mode for Chrome
-                         version prior to M62 (inclusive)
-mt .................... use multi-threading if available
-
-version ............... print version number and exit
-v ..................... verbose
-quiet ................. don't print anything
-```
-
-### Building
-
-With the libgif development files installed, gif2webp can be built using
-makefile.unix:
-
-```shell
-$ make -f makefile.unix examples/gif2webp
-```
-
-or using autoconf:
-
-```shell
-$ ./configure --enable-everything
-$ make
-```
-
-## Comparison of animated images
-
-Test utility anim_diff under examples/ can be used to compare two animated
-images (each can be GIF or WebP).
-
-Usage:
-
-```shell
-anim_diff <image1> <image2> [options]
-```
-
-Options:
-
-```
-dump_frames <folder> dump decoded frames in PAM format
-min_psnr <float> ... minimum per-frame PSNR
-raw_comparison ..... if this flag is not used, RGB is
-                      premultiplied before comparison
-max_diff <int> ..... maximum allowed difference per channel
-                      between corresponding pixels in subsequent
-                      frames
-h .................. this help
-version ............ print version number and exit
-```
-
-### Building
-
-With the libgif development files installed, anim_diff can be built using
-makefile.unix:
-
-```shell
-$ make -f makefile.unix examples/anim_diff
-```
-
-or using autoconf:
-
-```shell
-$ ./configure --enable-everything
-$ make
-```
-
-## WebP Mux tool
-
-The examples/ directory contains a tool (webpmux) for manipulating WebP files.
-The webpmux tool can be used to create an extended format WebP file and also to
-extract or strip relevant data from such a file.
-
-A list of options is available using the -help command line flag:
-
-```shell
-> webpmux -help
-Usage: webpmux -get GET_OPTIONS INPUT -o OUTPUT
-       webpmux -set SET_OPTIONS INPUT -o OUTPUT
-       webpmux -duration DURATION_OPTIONS [-duration ...]
-               INPUT -o OUTPUT
-       webpmux -strip STRIP_OPTIONS INPUT -o OUTPUT
-       webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]
-               [-bgcolor BACKGROUND_COLOR] -o OUTPUT
-       webpmux -info INPUT
-       webpmux [-h|-help]
-       webpmux -version
-       webpmux argument_file_name
-
-GET_OPTIONS:
- Extract relevant data:
-   icc       get ICC profile
-   exif      get EXIF metadata
-   xmp       get XMP metadata
-   frame n   get nth frame
-
-SET_OPTIONS:
- Set color profile/metadata/parameters:
-   loop LOOP_COUNT            set the loop count
-   bgcolor BACKGROUND_COLOR   set the animation background color
-   icc  file.icc              set ICC profile
-   exif file.exif             set EXIF metadata
-   xmp  file.xmp              set XMP metadata
-   where:    'file.icc' contains the ICC profile to be set,
-             'file.exif' contains the EXIF metadata to be set
-             'file.xmp' contains the XMP metadata to be set
-
-DURATION_OPTIONS:
- Set duration of selected frames:
-   duration            set duration for all frames
-   duration,frame      set duration of a particular frame
-   duration,start,end  set duration of frames in the
-                        interval [start,end])
-   where: 'duration' is the duration in milliseconds
-          'start' is the start frame index
-          'end' is the inclusive end frame index
-           The special 'end' value '0' means: last frame.
-
-STRIP_OPTIONS:
- Strip color profile/metadata:
-   icc       strip ICC profile
-   exif      strip EXIF metadata
-   xmp       strip XMP metadata
-
-FRAME_OPTIONS(i):
- Create animation:
-   file_i +di[+xi+yi[+mi[bi]]]
-   where:    'file_i' is the i'th animation frame (WebP format),
-             'di' is the pause duration before next frame,
-             'xi','yi' specify the image offset for this frame,
-             'mi' is the dispose method for this frame (0 or 1),
-             'bi' is the blending method for this frame (+b or -b)
-
-LOOP_COUNT:
- Number of times to repeat the animation.
- Valid range is 0 to 65535 [Default: 0 (infinite)].
-
-BACKGROUND_COLOR:
- Background color of the canvas.
-  A,R,G,B
-  where:    'A', 'R', 'G' and 'B' are integers in the range 0 to 255 specifying
-            the Alpha, Red, Green and Blue component values respectively
-            [Default: 255,255,255,255]
-
-INPUT & OUTPUT are in WebP format.
-
-Note: The nature of EXIF, XMP and ICC data is not checked and is assumed to be
-valid.
-
-Note: if a single file name is passed as the argument, the arguments will be
-tokenized from this file. The file name must not start with the character '-'.
-```
--- a/doc/webp-container-spec.txt
+++ b/doc/webp-container-spec.txt
@@ -2,10 +2,10 @@

 Although you may be viewing an alternate representation, this document
 is sourced in Markdown, a light-duty markup scheme, and is optimized for
-the [kramdown](https://kramdown.gettalong.org/) transformer.
+the [kramdown](http://kramdown.rubyforge.org/) transformer.

-See the accompanying specs_generation.md. External link targets are referenced
-at the end of this file.
+See the accompanying README. External link targets are referenced at the
+end of this file.

 -->

@@ -36,7 +36,7 @@ for:
  * **Lossless compression.** An image can be losslessly compressed, using the
    WebP Lossless Format.

-  * **Metadata.** An image may have metadata stored in Exif or XMP formats.
+  * **Metadata.** An image may have metadata stored in EXIF or XMP formats.

  * **Transparency.** An image may have transparency, i.e., an alpha channel.

@@ -94,7 +94,7 @@ _1-based_
 RIFF File Format
 ----------------

-The WebP file format is based on the RIFF (Resource Interchange File Format)
+The WebP file format is based on the RIFF (resource interchange file format)
 document format.

 The basic element of a RIFF file is a _chunk_. It consists of:
@@ -261,7 +261,7 @@ An extended format file consists of:

  * Image data.

-  * An optional 'EXIF' chunk with Exif metadata.
+  * An optional 'EXIF' chunk with EXIF metadata.

  * An optional 'XMP ' chunk with XMP metadata.

@@ -317,9 +317,9 @@ Alpha (L): 1 bit
 : Set if any of the frames of the image contain transparency information
  ("alpha").

-Exif metadata (E): 1 bit
+EXIF metadata (E): 1 bit

-: Set if the file contains Exif metadata.
+: Set if the file contains EXIF metadata.

 XMP metadata (X): 1 bit

@@ -341,12 +341,12 @@ Reserved: 24 bits
 Canvas Width Minus One: 24 bits

 : _1-based_ width of the canvas in pixels.
-  The actual canvas width is `1 + Canvas Width Minus One`.
+  The actual canvas width is '1 + Canvas Width Minus One'

 Canvas Height Minus One: 24 bits

 : _1-based_ height of the canvas in pixels.
-  The actual canvas height is `1 + Canvas Height Minus One`.
+  The actual canvas height is '1 + Canvas Height Minus One'

 The product of _Canvas Width_ and _Canvas Height_ MUST be at most `2^32 - 1`.

@@ -423,21 +423,21 @@ If the _Animation flag_ is not set, then this chunk SHOULD NOT be present.

 Frame X: 24 bits (_uint24_)

-: The X coordinate of the upper left corner of the frame is `Frame X * 2`.
+: The X coordinate of the upper left corner of the frame is `Frame X * 2`

 Frame Y: 24 bits (_uint24_)

-: The Y coordinate of the upper left corner of the frame is `Frame Y * 2`.
+: The Y coordinate of the upper left corner of the frame is `Frame Y * 2`

 Frame Width Minus One: 24 bits (_uint24_)

 : The _1-based_ width of the frame.
-  The frame width is `1 + Frame Width Minus One`.
+  The frame width is `1 + Frame Width Minus One`

 Frame Height Minus One: 24 bits (_uint24_)

 : The _1-based_ height of the frame.
-  The frame height is `1 + Frame Height Minus One`.
+  The frame height is `1 + Frame Height Minus One`

 Frame Duration: 24 bits (_uint24_)

@@ -677,12 +677,12 @@ EXIF chunk:
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |                      ChunkHeader('EXIF')                      |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    |                        Exif Metadata                          |
+    |                        EXIF Metadata                          |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

-Exif Metadata: _Chunk Size_ bytes
+EXIF Metadata: _Chunk Size_ bytes

-: image metadata in Exif format.
+: image metadata in EXIF format.

 XMP chunk:

@@ -798,7 +798,7 @@ RIFF/WEBP
 +- XMP  (metadata)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-An animated image with Exif metadata may look as follows:
+An animated image with EXIF metadata may look as follows:

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 RIFF/WEBP
@@ -811,9 +811,9 @@ RIFF/WEBP
 +- EXIF (metadata)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-[vp8spec]:  https://datatracker.ietf.org/doc/html/rfc6386
+[vp8spec]:  http://tools.ietf.org/html/rfc6386
 [webpllspec]: https://chromium.googlesource.com/webm/libwebp/+/HEAD/doc/webp-lossless-bitstream-spec.txt
-[iccspec]: https://www.color.org/icc_specs2.xalter
-[metadata]: https://web.archive.org/web/20180919181934/http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf
-[rfc 1166]: https://datatracker.ietf.org/doc/html/rfc1166
-[rfc 2119]: https://datatracker.ietf.org/doc/html/rfc2119
+[iccspec]: http://www.color.org/icc_specs2.xalter
+[metadata]: http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf
+[rfc 1166]: http://tools.ietf.org/html/rfc1166
+[rfc 2119]: http://tools.ietf.org/html/rfc2119
--- a/doc/webp-lossless-bitstream-spec.txt
+++ b/doc/webp-lossless-bitstream-spec.txt
@@ -2,10 +2,10 @@

 Although you may be viewing an alternate representation, this document
 is sourced in Markdown, a light-duty markup scheme, and is optimized for
-the [kramdown](https://kramdown.gettalong.org/) transformer.
+the [kramdown](http://kramdown.rubyforge.org/) transformer.

-See the accompanying specs_generation.md. External link targets are referenced
-at the end of this file.
+See the accompanying README. External link targets are referenced at the
+end of this file.

 -->

@@ -16,8 +16,6 @@ _Jyrki Alakuijala, Ph.D., Google, Inc., 2012-06-19_

 Paragraphs marked as \[AMENDED\] were amended on 2014-09-16.

-Paragraphs marked as \[AMENDED2\] were amended on 2022-05-13.
-
 Abstract
 --------

@@ -27,7 +25,7 @@ exactly, including the color values for zero alpha pixels. The
 format uses subresolution images, recursively embedded into the format
 itself, for storing statistical data about the images, such as the used
 entropy codes, spatial predictors, color space conversion, and color
-table. LZ77, prefix coding, and a color cache are used for compression
+table. LZ77, Huffman coding, and a color cache are used for compression
 of the bulk data. Decoding speeds faster than PNG have been
 demonstrated, as well as 25% denser compression than can be achieved
 using today's PNG format.
@@ -65,9 +63,9 @@ distance mapping
 entropy image
 : A two-dimensional subresolution image indicating which entropy coding
  should be used in a respective square in the image, i.e., each pixel
-  is a meta prefix code.
+  is a meta Huffman code.

-prefix code
+Huffman code
 : A classic way to do entropy coding where a smaller number of bits are
  used for more frequent codes.

@@ -75,9 +73,9 @@ LZ77
 : Dictionary-based sliding window compression algorithm that either
  emits symbols or describes them as sequences of past symbols.

-meta prefix code
+meta Huffman code
 : A small integer (up to 16 bits) that indexes an element in the meta
-  prefix table.
+  Huffman table.

 predictor image
 : A two-dimensional subresolution image indicating which spatial
@@ -237,7 +235,7 @@ transform, the current pixel value is predicted from the pixels already
 decoded (in scan-line order) and only the residual value (actual -
 predicted) is encoded. The _prediction mode_ determines the type of
 prediction to use. We divide the image into squares and all the pixels
-in a square use the same prediction mode.
+in a square use same prediction mode.

 The first 3 bits of prediction data define the block width and height in
 number of bits. The number of block columns, `block_xsize`, is used in
@@ -369,17 +367,15 @@ the predicted value for the left-topmost pixel of the image is
 0xff000000, L-pixel for all pixels on the top row, and T-pixel for all
 pixels on the leftmost column.

-\[AMENDED2\]
 Addressing the TR-pixel for pixels on the rightmost column is
 exceptional. The pixels on the rightmost column are predicted by using
-the modes \[0..13\] just like pixels not on the border, but the leftmost pixel
-on the same row as the current pixel is instead used as the TR-pixel.
+the modes \[0..13\] just like pixels not on border, but by using the
+leftmost pixel on the same row as the current TR-pixel. The TR-pixel
+offset in memory is the same for border and non-border pixels.


 ### Color Transform

-\[AMENDED2\]
-
 The goal of the color transform is to decorrelate the R, G and B values
 of each pixel. Color transform keeps the green (G) value as it is,
 transforms red (R) based on green and transforms blue (B) based on green
@@ -400,8 +396,8 @@ typedef struct {
 The actual color transformation is done by defining a color transform
 delta. The color transform delta depends on the `ColorTransformElement`,
 which is the same for all the pixels in a particular block. The delta is
-subtracted during color transform. The inverse color transform then is just
-adding those deltas.
+added during color transform. The inverse color transform then is just
+subtracting those deltas.

 The color transform function is defined as follows:

@@ -410,13 +406,13 @@ void ColorTransform(uint8 red, uint8 blue, uint8 green,
                    ColorTransformElement *trans,
                    uint8 *new_red, uint8 *new_blue) {
  // Transformed values of red and blue components
-  int tmp_red = red;
-  int tmp_blue = blue;
+  uint32 tmp_red = red;
+  uint32 tmp_blue = blue;

-  // Applying the transform is just subtracting the transform deltas
-  tmp_red  -= ColorTransformDelta(p->green_to_red_,  green);
-  tmp_blue -= ColorTransformDelta(p->green_to_blue_, green);
-  tmp_blue -= ColorTransformDelta(p->red_to_blue_, red);
+  // Applying transform is just adding the transform deltas
+  tmp_red  += ColorTransformDelta(trans->green_to_red, green);
+  tmp_blue += ColorTransformDelta(trans->green_to_blue, green);
+  tmp_blue += ColorTransformDelta(trans->red_to_blue, red);

  *new_red = tmp_red & 0xff;
  *new_blue = tmp_blue & 0xff;
@@ -434,7 +430,7 @@ int8 ColorTransformDelta(int8 t, int8 c) {
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 A conversion from the 8-bit unsigned representation (uint8) to the 8-bit
-signed one (int8) is required before calling `ColorTransformDelta()`.
+signed one (int8) is required before calling ColorTransformDelta().
 It should be performed using 8-bit two's complement (that is: uint8 range
 \[128-255\] is mapped to the \[-128, -1\] range of its converted int8 value).

@@ -472,18 +468,14 @@ channels.
 void InverseTransform(uint8 red, uint8 green, uint8 blue,
                      ColorTransformElement *p,
                      uint8 *new_red, uint8 *new_blue) {
-  // Transformed values of red and blue components
-  int tmp_red = red;
-  int tmp_blue = blue;
-
-  // Applying inverse transform is just adding the
+  // Applying inverse transform is just subtracting the
  // color transform deltas
-  tmp_red  += ColorTransformDelta(trans->green_to_red, green);
-  tmp_blue += ColorTransformDelta(trans->green_to_blue, green);
-  tmp_blue += ColorTransformDelta(trans->red_to_blue, tmp_red & 0xff);
+  red  -= ColorTransformDelta(p->green_to_red_,  green);
+  blue -= ColorTransformDelta(p->green_to_blue_, green);
+  blue -= ColorTransformDelta(p->red_to_blue_, red & 0xff);

-  *new_red = tmp_red & 0xff;
-  *new_blue = tmp_blue & 0xff;
+  *new_red = red & 0xff;
+  *new_blue = blue & 0xff;
 }
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

@@ -598,12 +590,12 @@ The values are packed into the green component as follows:
    4 most-significant bits of the green value at x / 2.
  * `width_bits` = 2: for every x value where x ≡ 0 (mod 4), a green
    value at x is positioned into the 2 least-significant bits of the
-    green value at x / 4, green values at x + 1 to x + 3 are positioned in order
-    to the more significant bits of the green value at x / 4.
+    green value at x / 4, green values at x + 1 to x + 3 in order to the
+    more significant bits of the green value at x / 4.
  * `width_bits` = 3: for every x value where x ≡ 0 (mod 8), a green
    value at x is positioned into the least-significant bit of the green
-    value at x / 8, green values at x + 1 to x + 7 are positioned in order to
-    the more significant bits of the green value at x / 8.
+    value at x / 8, green values at x + 1 to x + 7 in order to the more
+    significant bits of the green value at x / 8.


 4 Image Data
@@ -617,8 +609,8 @@ We use image data in five different roles:

  1. ARGB image: Stores the actual pixels of the image.
  1. Entropy image: Stores the
-     [meta prefix codes](#decoding-of-meta-prefix-codes). The red and green
-     components of a pixel define the meta prefix code used in a particular
+     [meta Huffman codes](#decoding-of-meta-huffman-codes). The red and green
+     components of a pixel define the meta Huffman code used in a particular
     block of the ARGB image.
  1. Predictor image: Stores the metadata for [Predictor
     Transform](#predictor-transform). The green component of a pixel defines
@@ -629,7 +621,7 @@ We use image data in five different roles:
     the image. Each `ColorTransformElement` `'cte'` is treated as a pixel whose
     alpha component is `255`, red component is `cte.red_to_blue`, green
     component is `cte.green_to_blue` and blue component is `cte.green_to_red`.
-  1. Color indexing image: An array of size `color_table_size` (up to 256
+  1. Color indexing image: An array of of size `color_table_size` (up to 256
     ARGB values) storing the metadata for the
     [Color Indexing Transform](#color-indexing-transform). This is stored as an
     image of width `color_table_size` and height `1`.
@@ -651,7 +643,7 @@ the image.

 Each pixel is encoded using one of the three possible methods:

-  1. prefix coded literal: each channel (green, red, blue and alpha) is
+  1. Huffman coded literal: each channel (green, red, blue and alpha) is
     entropy-coded independently;
  2. LZ77 backward reference: a sequence of pixels are copied from elsewhere
     in the image; or
@@ -660,9 +652,9 @@ Each pixel is encoded using one of the three possible methods:

 The following sub-sections describe each of these in detail.

-#### 4.2.1 Prefix Coded Literals
+#### 4.2.1 Huffman Coded Literals

-The pixel is stored as prefix coded values of green, red, blue and alpha (in
+The pixel is stored as Huffman coded values of green, red, blue and alpha (in
 that order). See [this section](#decoding-entropy-coded-image-data) for details.

 #### 4.2.2 LZ77 Backward Reference
@@ -686,7 +678,7 @@ very few values in the image. Thus, this approach results in a better
 compression overall.

 The following table denotes the prefix codes and extra bits used for storing
-different ranges of values.
+different range of values.

 Note: The maximum backward reference length is limited to 4096. Hence, only the
 first 24 prefix codes (with the respective extra bits) are meaningful for length
@@ -761,13 +753,13 @@ The mapping between distance code `i` and the neighboring pixel offset
 (-6, 7), (7, 6),  (-7, 6), (8, 5),  (7, 7),  (-7, 7), (8, 6),  (8, 7)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-For example, distance code `1` indicates an offset of `(0, 1)` for the
-neighboring pixel, that is, the pixel above the current pixel (0 pixel
-difference in X-direction and 1 pixel difference in Y-direction). Similarly,
-distance code `3` indicates left-top pixel.
+For example, distance code `1` indicates offset of `(0, 1)` for the neighboring
+pixel, that is, the pixel above the current pixel (0-pixel difference in
+X-direction and 1 pixel difference in Y-direction). Similarly, distance code
+`3` indicates left-top pixel.

-The decoder can convert a distance code `i` to a scan-line order distance
-`dist` as follows:
+The decoder can convert a distances code 'i' to a scan-line order distance
+'dist' as follows:

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 (xi, yi) = distance_map[i]
@@ -777,22 +769,21 @@ if (dist < 1) {
 }
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-where `distance_map` is the mapping noted above and `xsize` is the width of the
+where 'distance_map' is the mapping noted above and `xsize` is the width of the
 image in pixels.


 #### 4.2.3 Color Cache Coding
-{:#color-cache-code}

 Color cache stores a set of colors that have been recently used in the image.

 **Rationale:** This way, the recently used colors can sometimes be referred to
-more efficiently than emitting them using the other two methods (described in
-[4.2.1](#prefix-coded-literals) and [4.2.2](#lz77-backward-reference)).
+more efficiently than emitting them using other two methods (described in
+[4.2.1](#huffman-coded-literals) and [4.2.2](#lz77-backward-reference)).

 Color cache codes are stored as follows. First, there is a 1-bit value that
 indicates if the color cache is used. If this bit is 0, no color cache codes
-exist, and they are not transmitted in the prefix code that decodes the green
+exist, and they are not transmitted in the Huffman code that decodes the green
 symbols and the length prefix codes. However, if this bit is 1, the color cache
 size is read next:

@@ -823,245 +814,130 @@ literals, into the cache in the order they appear in the stream.

 ### 5.1 Overview

-Most of the data is coded using a [canonical prefix code][canonical_huff].
-Hence, the codes are transmitted by sending the _prefix code lengths_, as
-opposed to the actual _prefix codes_.
+Most of the data is coded using [canonical Huffman code][canonical_huff]. Hence,
+the codes are transmitted by sending the _Huffman code lengths_, as opposed to
+the actual _Huffman codes_.

-In particular, the format uses **spatially-variant prefix coding**. In other
+In particular, the format uses **spatially-variant Huffman coding**. In other
 words, different blocks of the image can potentially use different entropy
 codes.

-**Rationale**: Different areas of the image may have different characteristics.
-So, allowing them to use different entropy codes provides more flexibility and
-potentially better compression.
+**Rationale**: Different areas of the image may have different characteristics. So, allowing them to use different entropy codes provides more flexibility and
+potentially a better compression.

 ### 5.2 Details

-The encoded image data consists of several parts:
+The encoded image data consists of two parts:

-  1. Decoding and building the prefix codes \[AMENDED2\]
-  1. Meta prefix codes
+  1. Meta Huffman codes
  1. Entropy-coded image data

-#### 5.2.1 Decoding and Building the Prefix Codes
+#### 5.2.1 Decoding of Meta Huffman Codes

-There are several steps in decoding the prefix codes.
+As noted earlier, the format allows the use of different Huffman codes for
+different blocks of the image. _Meta Huffman codes_ are indexes identifying
+which Huffman codes to use in different parts of the image.

-**Decoding the Code Lengths:**
-{:#decoding-the-code-lengths}
-
-This section describes how to read the prefix code lengths from the bitstream.
-
-The prefix code lengths can be coded in two ways. The method used is specified
-by a 1-bit value.
-
-  * If this bit is 1, it is a _simple code length code_, and
-  * If this bit is 0, it is a _normal code length code_.
-
-In both cases, there can be unused code lengths that are still part of the
-stream. This may be inefficient, but it is allowed by the format.
-
-**(i) Simple Code Length Code:**
-
-\[AMENDED2\]
-
-This variant is used in the special case when only 1 or 2 prefix symbols are
-in the range \[0..255\] with code length `1`. All other prefix code lengths
-are implicitly zeros.
-
-The first bit indicates the number of symbols:
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-int num_symbols = ReadBits(1) + 1;
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Following are the symbol values.
-This first symbol is coded using 1 or 8 bits depending on the value of
-`is_first_8bits`. The range is \[0..1\] or \[0..255\], respectively.
-The second symbol, if present, is always assumed to be in the range \[0..255\]
-and coded using 8 bits.
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-int is_first_8bits = ReadBits(1);
-symbol0 = ReadBits(1 + 7 * is_first_8bits);
-code_lengths[symbol0] = 1;
-if (num_symbols == 2) {
-  symbol1 = ReadBits(8);
-  code_lengths[symbol1] = 1;
-}
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-**Note:** Another special case is when _all_ prefix code lengths are _zeros_
-(an empty prefix code). For example, a prefix code for distance can be empty
-if there are no backward references. Similarly, prefix codes for alpha, red,
-and blue can be empty if all pixels within the same meta prefix code are
-produced using the color cache. However, this case doesn't need a special
-handling, as empty prefix codes can be coded as those containing a single
-symbol `0`.
-
-**(ii) Normal Code Length Code:**
-
-The code lengths of the prefix code fit in 8 bits and are read as follows.
-First, `num_code_lengths` specifies the number of code lengths.
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-int num_code_lengths = 4 + ReadBits(4);
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-If `num_code_lengths` is > 18, the bitstream is invalid.
-
-The code lengths are themselves encoded using prefix codes: lower level code
-lengths `code_length_code_lengths` first have to be read. The rest of those
-`code_length_code_lengths` (according to the order in `kCodeLengthCodeOrder`)
-are zeros.
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-int kCodeLengthCodes = 19;
-int kCodeLengthCodeOrder[kCodeLengthCodes] = {
-  17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
-};
-int code_length_code_lengths[kCodeLengthCodes] = { 0 };  // All zeros.
-for (i = 0; i < num_code_lengths; ++i) {
-  code_length_code_lengths[kCodeLengthCodeOrder[i]] = ReadBits(3);
-}
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Next, if `ReadBits(1) == 0`, the maximum number of different read symbols is
-`num_code_lengths`. Otherwise, it is defined as:
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-int length_nbits = 2 + 2 * ReadBits(3);
-int max_symbol = 2 + ReadBits(length_nbits);
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-A prefix table is then built from `code_length_code_lengths` and used to read
-up to `max_symbol` code lengths.
-
-  * Code \[0..15\] indicates literal code lengths.
-    * Value 0 means no symbols have been coded.
-    * Values \[1..15\] indicate the bit length of the respective code.
-  * Code 16 repeats the previous non-zero value \[3..6\] times, i.e.,
-    `3 + ReadBits(2)` times. If code 16 is used before a non-zero
-    value has been emitted, a value of 8 is repeated.
-  * Code 17 emits a streak of zeros \[3..10\], i.e., `3 + ReadBits(3)`
-    times.
-  * Code 18 emits a streak of zeros of length \[11..138\], i.e.,
-    `11 + ReadBits(7)` times.
-
-Once code lengths are read, a prefix code for each symbol type (A, R, G, B,
-distance) is formed using their respective alphabet sizes:
-
-  * G channel: 256 + 24 + `color_cache_size`
-  * other literals (A,R,B): 256
-  * distance code: 40
-
-#### 5.2.2 Decoding of Meta Prefix Codes
-
-As noted earlier, the format allows the use of different prefix codes for
-different blocks of the image. _Meta prefix codes_ are indexes identifying
-which prefix codes to use in different parts of the image.
-
-Meta prefix codes may be used _only_ when the image is being used in the
+Meta Huffman codes may be used _only_ when the image is being used in the
 [role](#roles-of-image-data) of an _ARGB image_.

-There are two possibilities for the meta prefix codes, indicated by a 1-bit
+There are two possibilities for the meta Huffman codes, indicated by a 1-bit
 value:

-  * If this bit is zero, there is only one meta prefix code used everywhere in
+  * If this bit is zero, there is only one meta Huffman code used everywhere in
    the image. No more data is stored.
-  * If this bit is one, the image uses multiple meta prefix codes. These meta
-    prefix codes are stored as an _entropy image_ (described below).
+  * If this bit is one, the image uses multiple meta Huffman codes. These meta
+    Huffman codes are stored as an _entropy image_ (described below).

 **Entropy image:**

-The entropy image defines which prefix codes are used in different parts of the
+The entropy image defines which Huffman codes are used in different parts of the
 image, as described below.

-The first 3-bits contain the `prefix_bits` value. The dimensions of the entropy
-image are derived from 'prefix_bits'.
+The first 3-bits contain the `huffman_bits` value. The dimensions of the entropy
+image are derived from 'huffman_bits'.

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-int prefix_bits = ReadBits(3) + 2;
-int prefix_xsize = DIV_ROUND_UP(xsize, 1 << prefix_bits);
-int prefix_ysize = DIV_ROUND_UP(ysize, 1 << prefix_bits);
+int huffman_bits = ReadBits(3) + 2;
+int huffman_xsize = DIV_ROUND_UP(xsize, 1 << huffman_bits);
+int huffman_ysize = DIV_ROUND_UP(ysize, 1 << huffman_bits);
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 where `DIV_ROUND_UP` is as defined [earlier](#predictor-transform).

-The next bits contain an entropy image of width `prefix_xsize` and height
-`prefix_ysize`.
+Next bits contain an entropy image of width `huffman_xsize` and height
+`huffman_ysize`.

-**Interpretation of Meta Prefix Codes:**
+**Interpretation of Meta Huffman Codes:**

-For any given pixel (x, y), there is a set of five prefix codes associated with
+For any given pixel (x, y), there is a set of five Huffman codes associated with
 it. These codes are (in bitstream order):

-  * **prefix code #1**: used for green channel, backward-reference length and
+  * **Huffman code #1**: used for green channel, backward-reference length and
    color cache
-  * **prefix code #2, #3 and #4**: used for red, blue and alpha channels
+  * **Huffman code #2, #3 and #4**: used for red, blue and alpha channels
    respectively.
-  * **prefix code #5**: used for backward-reference distance.
+  * **Huffman code #5**: used for backward-reference distance.

-From here on, we refer to this set as a **prefix code group**.
+From here on, we refer to this set as a **Huffman code group**.

-The number of prefix code groups in the ARGB image can be obtained by finding
-the _largest meta prefix code_ from the entropy image:
+The number of Huffman code groups in the ARGB image can be obtained by finding
+the _largest meta Huffman code_ from the entropy image:

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-int num_prefix_groups = max(entropy image) + 1;
+int num_huff_groups = max(entropy image) + 1;
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-where `max(entropy image)` indicates the largest prefix code stored in the
+where `max(entropy image)` indicates the largest Huffman code stored in the
 entropy image.

-As each prefix code group contains five prefix codes, the total number of
-prefix codes is:
+As each Huffman code groups contains five Huffman codes, the total number of
+Huffman codes is:

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-int num_prefix_codes = 5 * num_prefix_groups;
+int num_huff_codes = 5 * num_huff_groups;
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-Given a pixel (x, y) in the ARGB image, we can obtain the corresponding prefix
+Given a pixel (x, y) in the ARGB image, we can obtain the corresponding Huffman
 codes to be used as follows:

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-int position = (y >> prefix_bits) * prefix_xsize + (x >> prefix_bits);
-int meta_prefix_code = (entropy_image[pos] >> 8) & 0xffff;
-PrefixCodeGroup prefix_group = prefix_code_groups[meta_prefix_code];
+int position = (y >> huffman_bits) * huffman_xsize + (x >> huffman_bits);
+int meta_huff_code = (entropy_image[pos] >> 8) & 0xffff;
+HuffmanCodeGroup huff_group = huffman_code_groups[meta_huff_code];
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-where, we have assumed the existence of `PrefixCodeGroup` structure, which
-represents a set of five prefix codes. Also, `prefix_code_groups` is an array
-of `PrefixCodeGroup` (of size `num_prefix_groups`).
+where, we have assumed the existence of `HuffmanCodeGroup` structure, which
+represents a set of five Huffman codes. Also, `huffman_code_groups` is an array
+of `HuffmanCodeGroup` (of size `num_huff_groups`).

-The decoder then uses prefix code group `prefix_group` to decode the pixel
+The decoder then uses Huffman code group `huff_group` to decode the pixel
 (x, y) as explained in the [next section](#decoding-entropy-coded-image-data).

-#### 5.2.3 Decoding Entropy-coded Image Data
-
-\[AMENDED2\]
+#### 5.2.2 Decoding Entropy-coded Image Data

 For the current position (x, y) in the image, the decoder first identifies the
-corresponding prefix code group (as explained in the last section). Given the
-prefix code group, the pixel is read and decoded as follows:
+corresponding Huffman code group (as explained in the last section). Given the
+Huffman code group, the pixel is read and decoded as follows:

-Read next symbol S from the bitstream using prefix code #1. Note that S is any
-integer in the range `0` to
-`(256 + 24 + ` [`color_cache_size`](#color-cache-code)` - 1)`.
+Read next symbol S from the bitstream using Huffman code #1. \[See
+[next section](#decoding-the-code-lengths) for details on decoding the Huffman
+code lengths\]. Note that S is any integer in the range `0` to
+`(256 + 24 + ` [`color_cache_size`](#color-cache-code)`- 1)`.

 The interpretation of S depends on its value:

  1. if S < 256
-     1. Use S as the green component.
-     1. Read red from the bitstream using prefix code #2.
-     1. Read blue from the bitstream using prefix code #3.
-     1. Read alpha from the bitstream using prefix code #4.
-  1. if S >= 256 && S < 256 + 24
-     1. Use S - 256 as a length prefix code.
-     1. Read extra bits for length from the bitstream.
+     1. Use S as the green component
+     1. Read red from the bitstream using Huffman code #2
+     1. Read blue from the bitstream using Huffman code #3
+     1. Read alpha from the bitstream using Huffman code #4
+  1. if S < 256 + 24
+     1. Use S - 256 as a length prefix code
+     1. Read extra bits for length from the bitstream
     1. Determine backward-reference length L from length prefix code and the
        extra bits read.
-     1. Read distance prefix code from the bitstream using prefix code #5.
-     1. Read extra bits for distance from the bitstream.
+     1. Read distance prefix code from the bitstream using Huffman code #5
+     1. Read extra bits for distance from the bitstream
     1. Determine backward-reference distance D from distance prefix code and
        the extra bits read.
     1. Copy the L pixels (in scan-line order) from the sequence of pixels
@@ -1071,6 +947,80 @@ The interpretation of S depends on its value:
     1. Get ARGB color from the color cache at that index.


+**Decoding the Code Lengths:**
+{:#decoding-the-code-lengths}
+
+This section describes the details about reading a symbol from the bitstream by
+decoding the Huffman code length.
+
+The Huffman code lengths can be coded in two ways. The method used is specified
+by a 1-bit value.
+
+  * If this bit is 1, it is a _simple code length code_, and
+  * If this bit is 0, it is a _normal code length code_.
+
+**(i) Simple Code Length Code:**
+
+This variant is used in the special case when only 1 or 2 Huffman code lengths
+are non-zero, and are in the range of \[0, 255\]. All other Huffman code lengths
+are implicitly zeros.
+
+The first bit indicates the number of non-zero code lengths:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+int num_code_lengths = ReadBits(1) + 1;
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The first code length is stored either using a 1-bit code for values of 0 and 1,
+or using an 8-bit code for values in range \[0, 255\]. The second code length,
+when present, is coded as an 8-bit code.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+int is_first_8bits = ReadBits(1);
+code_lengths[0] = ReadBits(1 + 7 * is_first_8bits);
+if (num_code_lengths == 2) {
+  code_lengths[1] = ReadBits(8);
+}
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Note:** Another special case is when _all_ Huffman code lengths are _zeros_
+(an empty Huffman code). For example, a Huffman code for distance can be empty
+if there are no backward references. Similarly, Huffman codes for alpha, red,
+and blue can be empty if all pixels within the same meta Huffman code are
+produced using the color cache. However, this case doesn't need a special
+handling, as empty Huffman codes can be coded as those containing a single
+symbol `0`.
+
+**(ii) Normal Code Length Code:**
+
+The code lengths of a Huffman code are read as follows: `num_code_lengths`
+specifies the number of code lengths; the rest of the code lengths
+(according to the order in `kCodeLengthCodeOrder`) are zeros.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+int kCodeLengthCodes = 19;
+int kCodeLengthCodeOrder[kCodeLengthCodes] = {
+  17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+};
+int code_lengths[kCodeLengthCodes] = { 0 };  // All zeros.
+int num_code_lengths = 4 + ReadBits(4);
+for (i = 0; i < num_code_lengths; ++i) {
+  code_lengths[kCodeLengthCodeOrder[i]] = ReadBits(3);
+}
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+  * Code length code \[0..15\] indicates literal code lengths.
+    * Value 0 means no symbols have been coded.
+    * Values \[1..15\] indicate the bit length of the respective code.
+  * Code 16 repeats the previous non-zero value \[3..6\] times, i.e.,
+    3 + `ReadBits(2)` times.  If code 16 is used before a non-zero
+    value has been emitted, a value of 8 is repeated.
+  * Code 17 emits a streak of zeros \[3..10\], i.e., 3 + `ReadBits(3)`
+    times.
+  * Code 18 emits a streak of zeros of length \[11..138\], i.e.,
+    11 + `ReadBits(7)` times.
+
+
 6 Overall Structure of the Format
 ---------------------------------

@@ -1106,26 +1056,23 @@ of pixels (xsize * ysize).

 #### Structure of the Image Data

-\[AMENDED2\]
-
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-<spatially-coded image> ::= <color cache info><meta prefix><data>
-<entropy-coded image> ::= <color cache info><data>
+<spatially-coded image> ::= <meta huffman><entropy-coded image>
+<entropy-coded image> ::= <color cache info><huffman codes><lz77-coded image>
+<meta huffman> ::= 1-bit value 0 |
+                   (1-bit value 1; <entropy image>)
+<entropy image> ::= 3-bit subsample value; <entropy-coded image>
 <color cache info> ::= 1 bit value 0 |
                       (1-bit value 1; 4-bit value for color cache size)
-<meta prefix> ::= 1-bit value 0 |
-                  (1-bit value 1; <entropy image>)
-<data> ::= <prefix codes><lz77-coded image>
-<entropy image> ::= 3-bit subsample value; <entropy-coded image>
-<prefix codes> ::= <prefix code group> | <prefix code group><prefix codes>
-<prefix code group> ::= <prefix code><prefix code><prefix code>
-                        <prefix code><prefix code>
-                        See "Interpretation of Meta Prefix Codes" to
-                        understand what each of these five prefix codes are
-                        for.
-<prefix code> ::= <simple prefix code> | <normal prefix code>
-<simple prefix code> ::= see "Simple code length code" for details
-<normal prefix code> ::= <code length code>; encoded code lengths
+<huffman codes> ::= <huffman code group> | <huffman code group><huffman codes>
+<huffman code group> ::= <huffman code><huffman code><huffman code>
+                         <huffman code><huffman code>
+                         See "Interpretation of Meta Huffman codes" to
+                         understand what each of these five Huffman codes are
+                         for.
+<huffman code> ::= <simple huffman code> | <normal huffman code>
+<simple huffman code> ::= see "Simple code length code" for details
+<normal huffman code> ::= <code length code>; encoded code lengths
 <code length code> ::= see section "Normal code length code"
 <lz77-coded image> ::= ((<argb-pixel> | <lz77-copy> | <color-cache-code>)
                       <lz77-coded image>) | ""
@@ -1135,8 +1082,9 @@ A possible example sequence:

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 <RIFF header><image size>1-bit value 1<subtract-green-tx>
-1-bit value 1<predictor-tx>1-bit value 0<color cache info>1-bit value 0
-<prefix codes><lz77-coded image>
+1-bit value 1<predictor-tx>1-bit value 0<meta huffman>
+<color cache info><huffman codes>
+<lz77-coded image>
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-[canonical_huff]: https://en.wikipedia.org/wiki/Canonical_Huffman_code
+[canonical_huff]: http://en.wikipedia.org/wiki/Canonical_Huffman_code
--- a/examples/anim_util.c
+++ b/examples/anim_util.c
@@ -241,7 +241,7 @@ static int ReadAnimatedWebP(const char filename[],
  image->bgcolor = anim_info.bgcolor;

  // Allocate frames.
-  if (!AllocateFrames(image, anim_info.frame_count)) goto End;
+  if (!AllocateFrames(image, anim_info.frame_count)) return 0;

  // Decode frames.
  while (WebPAnimDecoderHasMoreFrames(dec)) {
@@ -558,10 +558,7 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
    }
  }
  // Allocate frames.
-  if (!AllocateFrames(image, frame_count)) {
-    DGifCloseFile(gif, NULL);
-    return 0;
-  }
+  AllocateFrames(image, frame_count);

  canvas_width = image->canvas_width;
  canvas_height = image->canvas_height;
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@@ -571,7 +571,7 @@ static void HelpLong(void) {
  printf("  -qrange <min> <max> .... specifies the permissible quality range\n"
         "                           (default: 0 100)\n");
  printf("  -crop <x> <y> <w> <h> .. crop picture with the given rectangle\n");
-  printf("  -resize <w> <h> ........ resize picture (*after* any cropping)\n");
+  printf("  -resize <w> <h> ........ resize picture (after any cropping)\n");
  printf("  -mt .................... use multi-threading if available\n");
  printf("  -low_memory ............ reduce memory usage (slower encoding)\n");
  printf("  -map <int> ............. print map of extra info\n");
@@ -620,7 +620,6 @@ static void HelpLong(void) {
  printf("  -af .................... auto-adjust filter strength\n");
  printf("  -pre <int> ............. pre-processing filter\n");
  printf("\n");
-  printf("Supported input formats:\n  %s\n", WebPGetEnabledInputFileFormats());
 }

 //------------------------------------------------------------------------------
--- a/examples/dwebp.c
+++ b/examples/dwebp.c
@@ -96,7 +96,7 @@ static void Help(void) {
         "  -alpha_dither  use alpha-plane dithering if needed\n"
         "  -mt .......... use multi-threading\n"
         "  -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
-         "  -resize <w> <h> ......... resize output (*after* any cropping)\n"
+         "  -resize <w> <h> ......... scale the output (*after* any cropping)\n"
         "  -flip ........ flip the output vertically\n"
         "  -alpha ....... only save the alpha plane\n"
         "  -incremental . use incremental decoding (useful for tests)\n"
--- a/examples/example_util.c
+++ b/examples/example_util.c
@@ -103,10 +103,7 @@ int ExUtilInitCommandLineArguments(int argc, const char* argv[],
    }
    args->own_argv_ = 1;
    args->argv_ = (const char**)WebPMalloc(MAX_ARGC * sizeof(*args->argv_));
-    if (args->argv_ == NULL) {
-      ExUtilDeleteCommandLineArguments(args);
-      return 0;
-    }
+    if (args->argv_ == NULL) return 0;

    argc = 0;
    for (cur = strtok((char*)args->argv_data_.bytes, sep);
@@ -114,7 +111,6 @@ int ExUtilInitCommandLineArguments(int argc, const char* argv[],
         cur = strtok(NULL, sep)) {
      if (argc == MAX_ARGC) {
        fprintf(stderr, "ERROR: Arguments limit %d reached\n", MAX_ARGC);
-        ExUtilDeleteCommandLineArguments(args);
        return 0;
      }
      assert(strlen(cur) != 0);
--- a/examples/gif2webp.c
+++ b/examples/gif2webp.c
@@ -314,11 +314,8 @@ int main(int argc, const char* argv[]) {
          frame.use_argb = 1;
          if (!WebPPictureAlloc(&frame)) goto End;
          GIFClearPic(&frame, NULL);
-          if (!(WebPPictureCopy(&frame, &curr_canvas) &&
-                WebPPictureCopy(&frame, &prev_canvas))) {
-            fprintf(stderr, "Error allocating canvas.\n");
-            goto End;
-          }
+          WebPPictureCopy(&frame, &curr_canvas);
+          WebPPictureCopy(&frame, &prev_canvas);

          // Background color.
          GIFGetBackgroundColor(gif->SColorMap, gif->SBackGroundColor,
--- a/examples/img2webp.c
+++ b/examples/img2webp.c
@@ -35,7 +35,8 @@

 static void Help(void) {
  printf("Usage:\n\n");
-  printf("  img2webp [file_options] [[frame_options] frame_file]...\n");
+  printf("  img2webp [file-level options] [image files...] "
+         "[per-frame options...]\n");
  printf("\n");

  printf("File-level options (only used at the start of compression):\n");
@@ -65,8 +66,6 @@ static void Help(void) {
         "arguments will be\n");
  printf("tokenized from this file. The file name must not start with "
         "the character '-'.\n");
-  printf("\nSupported input formats:\n  %s\n",
-         WebPGetEnabledInputFileFormats());
 }

 //------------------------------------------------------------------------------
@@ -188,7 +187,7 @@ int main(int argc, const char* argv[]) {
        verbose = 1;
      } else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
        Help();
-        FREE_WARGV_AND_RETURN(0);
+        goto End;
      } else if (!strcmp(argv[c], "-version")) {
        const int enc_version = WebPGetEncoderVersion();
        const int mux_version = WebPGetMuxVersion();
--- a/examples/unicode.h
+++ b/examples/unicode.h
@@ -16,15 +16,11 @@
 #ifndef WEBP_EXAMPLES_UNICODE_H_
 #define WEBP_EXAMPLES_UNICODE_H_

-#include <stdio.h>
-
 #if defined(_WIN32) && defined(_UNICODE)

 // wchar_t is used instead of TCHAR because we only perform additional work when
 // Unicode is enabled and because the output of CommandLineToArgvW() is wchar_t.

-#include <fcntl.h>
-#include <io.h>
 #include <wchar.h>
 #include <windows.h>
 #include <shellapi.h>
@@ -59,16 +55,8 @@

 #define WFOPEN(ARG, OPT) _wfopen((const W_CHAR*)ARG, TO_W_CHAR(OPT))

-#define WFPRINTF(STREAM, STR, ...)                    \
-  do {                                                \
-    int prev_mode;                                    \
-    fflush(STREAM);                                   \
-    prev_mode = _setmode(_fileno(STREAM), _O_U8TEXT); \
-    fwprintf(STREAM, TO_W_CHAR(STR), __VA_ARGS__);    \
-    fflush(STREAM);                                   \
-    (void)_setmode(_fileno(STREAM), prev_mode);       \
-  } while (0)
-#define WPRINTF(STR, ...) WFPRINTF(stdout, STR, __VA_ARGS__)
+#define WPRINTF(STR, ...) wprintf(TO_W_CHAR(STR), __VA_ARGS__)
+#define WFPRINTF(STDERR, STR, ...) fwprintf(STDERR, TO_W_CHAR(STR), __VA_ARGS__)

 #define WSTRLEN(FILENAME) wcslen((const W_CHAR*)FILENAME)
 #define WSTRCMP(FILENAME, STR) wcscmp((const W_CHAR*)FILENAME, TO_W_CHAR(STR))
@@ -77,8 +65,6 @@

 #else

-#include <string.h>
-
 // Unicode file paths work as is on Unix platforms, and no extra work is done on
 // Windows either if Unicode is disabled.

@@ -97,7 +83,7 @@
 #define WFOPEN(ARG, OPT) fopen(ARG, OPT)

 #define WPRINTF(STR, ...) printf(STR, __VA_ARGS__)
-#define WFPRINTF(STREAM, STR, ...) fprintf(STREAM, STR, __VA_ARGS__)
+#define WFPRINTF(STDERR, STR, ...) fprintf(STDERR, STR, __VA_ARGS__)

 #define WSTRLEN(FILENAME) strlen(FILENAME)
 #define WSTRCMP(FILENAME, STR) strcmp(FILENAME, STR)
--- a/examples/unicode_gif.h
+++ b/examples/unicode_gif.h
@@ -45,19 +45,18 @@ static GifFileType* DGifOpenFileUnicode(const W_CHAR* file_name, int* error) {
  }

 #if defined(_WIN32) && defined(_UNICODE)
-  {
-    int file_handle = _wopen(file_name, _O_RDONLY | _O_BINARY);
-    if (file_handle == -1) {
-      if (error != NULL) *error = D_GIF_ERR_OPEN_FAILED;
-      return NULL;
-    }
+
+  int file_handle = _wopen(file_name, _O_RDONLY | _O_BINARY);
+  if (file_handle == -1) {
+    if (error != NULL) *error = D_GIF_ERR_OPEN_FAILED;
+    return NULL;
+  }

 #if LOCAL_GIF_PREREQ(5, 0)
-    return DGifOpenFileHandle(file_handle, error);
+  return DGifOpenFileHandle(file_handle, error);
 #else
-    return DGifOpenFileHandle(file_handle);
+  return DGifOpenFileHandle(file_handle);
 #endif
-  }

 #else

--- a/examples/vwebp.c
+++ b/examples/vwebp.c
@@ -292,19 +292,6 @@ static void PrintString(const char* const text) {
  }
 }

-static void PrintStringW(const char* const text) {
-#if defined(_WIN32) && defined(_UNICODE)
-  void* const font = GLUT_BITMAP_9_BY_15;
-  const W_CHAR* const wtext = (const W_CHAR*)text;
-  int i;
-  for (i = 0; wtext[i]; ++i) {
-    glutBitmapCharacter(font, wtext[i]);
-  }
-#else
-  PrintString(text);
-#endif
-}
-
 static float GetColorf(uint32_t color, int shift) {
  return ((color >> shift) & 0xff) / 255.f;
 }
@@ -409,7 +396,7 @@ static void HandleDisplay(void) {

    glColor4f(0.90f, 0.0f, 0.90f, 1.0f);
    glRasterPos2f(-0.95f, 0.90f);
-    PrintStringW(kParams.file_name);
+    PrintString(kParams.file_name);

    snprintf(tmp, sizeof(tmp), "Dimension:%d x %d", pic->width, pic->height);
    glColor4f(0.90f, 0.0f, 0.90f, 1.0f);
--- a/examples/webpinfo.c
+++ b/examples/webpinfo.c
@@ -125,16 +125,16 @@ static void WebPInfoInit(WebPInfo* const webp_info) {
  memset(webp_info, 0, sizeof(*webp_info));
 }

-static const uint32_t kWebPChunkTags[CHUNK_TYPES] = {
-  MKFOURCC('V', 'P', '8', ' '),
-  MKFOURCC('V', 'P', '8', 'L'),
-  MKFOURCC('V', 'P', '8', 'X'),
-  MKFOURCC('A', 'L', 'P', 'H'),
-  MKFOURCC('A', 'N', 'I', 'M'),
-  MKFOURCC('A', 'N', 'M', 'F'),
-  MKFOURCC('I', 'C', 'C', 'P'),
-  MKFOURCC('E', 'X', 'I', 'F'),
-  MKFOURCC('X', 'M', 'P', ' '),
+static const char kWebPChunkTags[CHUNK_TYPES][4] = {
+  { 'V', 'P', '8', ' ' },
+  { 'V', 'P', '8', 'L' },
+  { 'V', 'P', '8', 'X' },
+  { 'A', 'L', 'P', 'H' },
+  { 'A', 'N', 'I', 'M' },
+  { 'A', 'N', 'M', 'F' },
+  { 'I', 'C', 'C', 'P' },
+  { 'E', 'X', 'I', 'F' },
+  { 'X', 'M', 'P', ' ' },
 };

 // -----------------------------------------------------------------------------
@@ -644,7 +644,7 @@ static WebPInfoStatus ParseChunk(const WebPInfo* const webp_info,
      return WEBP_INFO_TRUNCATED_DATA;
    }
    for (i = 0; i < CHUNK_TYPES; ++i) {
-      if (kWebPChunkTags[i] == fourcc) break;
+      if (!memcmp(kWebPChunkTags[i], &fourcc, TAG_SIZE)) break;
    }
    chunk_data->offset_ = chunk_start_offset;
    chunk_data->size_ = chunk_size;
@@ -939,13 +939,7 @@ static WebPInfoStatus ProcessChunk(const ChunkData* const chunk_data,
    LOG_WARN(error_message);
  } else {
    if (!webp_info->quiet_) {
-      char tag[4];
-      uint32_t fourcc = kWebPChunkTags[chunk_data->id_];
-#ifdef WORDS_BIGENDIAN
-      fourcc = (fourcc >> 24) | ((fourcc >> 8) & 0xff00) |
-               ((fourcc << 8) & 0xff0000) | (fourcc << 24);
-#endif
-      memcpy(tag, &fourcc, sizeof(tag));
+      const char* tag = kWebPChunkTags[chunk_data->id_];
      printf("Chunk %c%c%c%c at offset %6d, length %6d\n",
             tag[0], tag[1], tag[2], tag[3], (int)chunk_data->offset_,
             (int)chunk_data->size_);
--- a/examples/webpmux.c
+++ b/examples/webpmux.c
@@ -329,7 +329,7 @@ static void PrintHelp(void) {
  printf("\n");
  printf("DURATION_OPTIONS:\n");
  printf(" Set duration of selected frames:\n");
-  printf("   duration            set duration for all frames\n");
+  printf("   duration            set duration for each frames\n");
  printf("   duration,frame      set duration of a particular frame\n");
  printf("   duration,start,end  set duration of frames in the\n");
  printf("                        interval [start,end])\n");
@@ -348,7 +348,7 @@ static void PrintHelp(void) {
  printf("\n");
  printf("FRAME_OPTIONS(i):\n");
  printf(" Create animation:\n");
-  printf("   file_i +di[+xi+yi[+mi[bi]]]\n");
+  printf("   file_i +di+[xi+yi[+mi[bi]]]\n");
  printf("   where:    'file_i' is the i'th animation frame (WebP format),\n");
  printf("             'di' is the pause duration before next frame,\n");
  printf("             'xi','yi' specify the image offset for this frame,\n");
@@ -460,8 +460,7 @@ static WebPMux* DuplicateMuxHeader(const WebPMux* const mux) {
    if (err == WEBP_MUX_OK && metadata.size > 0) {
      err = WebPMuxSetChunk(new_mux, kFourccList[i], &metadata, 1);
      if (err != WEBP_MUX_OK) {
-        ERROR_GOTO1("Error transferring metadata in DuplicateMuxHeader().",
-                    End);
+        ERROR_GOTO1("Error transferring metadata in DuplicateMux().", End);
      }
    }
  }
@@ -685,7 +684,7 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
          ERROR_GOTO1("ERROR: Multiple features specified.\n", ErrParse);
        }
        arg->subtype_ = SUBTYPE_ANMF;
-        arg->filename_ = wargv[i + 1];
+        arg->filename_ = argv[i + 1];
        arg->params_ = argv[i + 2];
        ++feature_arg_index;
        i += 3;
--- a/extras/extras.c
+++ b/extras/extras.c
@@ -19,7 +19,7 @@

 #define XTRA_MAJ_VERSION 1
 #define XTRA_MIN_VERSION 2
-#define XTRA_REV_VERSION 3
+#define XTRA_REV_VERSION 1

 //------------------------------------------------------------------------------

--- a/extras/get_disto.c
+++ b/extras/get_disto.c
@@ -223,8 +223,7 @@ static void Help(void) {
          "  -o <file> . save the diff map as a WebP lossless file\n"
          "  -scale .... scale the difference map to fit [0..255] range\n"
          "  -gray ..... use grayscale for difference map (-scale)\n"
-          "\nSupported input formats:\n  %s\n",
-          WebPGetEnabledInputFileFormats());
+          " Also handles PNG, JPG and TIFF files, in addition to WebP.\n");
 }

 int main(int argc, const char* argv[]) {
--- a/imageio/image_dec.c
+++ b/imageio/image_dec.c
@@ -11,24 +11,6 @@

 #include "./image_dec.h"

-const char* WebPGetEnabledInputFileFormats(void) {
-  return "WebP"
-#ifdef WEBP_HAVE_JPEG
-         ", JPEG"
-#endif
-#ifdef WEBP_HAVE_PNG
-         ", PNG"
-#endif
-         ", PNM (PGM, PPM, PAM)"
-#ifdef WEBP_HAVE_TIFF
-         ", TIFF"
-#endif
-#ifdef HAVE_WINCODEC_H
-         ", Windows Imaging Component (WIC)"
-#endif
-         "";
-}
-
 static WEBP_INLINE uint32_t GetBE32(const uint8_t buf[]) {
  return ((uint32_t)buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
 }
--- a/imageio/image_dec.h
+++ b/imageio/image_dec.h
@@ -41,9 +41,6 @@ typedef enum {
  WEBP_UNSUPPORTED_FORMAT
 } WebPInputFileFormat;

-// Returns a comma separated list of enabled input formats.
-const char* WebPGetEnabledInputFileFormats(void);
-
 // Try to infer the image format. 'data_size' should be larger than 12.
 // Returns WEBP_UNSUPPORTED_FORMAT if format can't be guess safely.
 WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,
--- a/imageio/image_enc.c
+++ b/imageio/image_enc.c
@@ -280,7 +280,7 @@ int WebPWrite16bAsPGM(FILE* fout, const WebPDecBuffer* const buffer) {
 }

 //------------------------------------------------------------------------------
-// BMP (see https://en.wikipedia.org/wiki/BMP_file_format#Pixel_storage)
+// BMP

 static void PutLE16(uint8_t* const dst, uint32_t value) {
  dst[0] = (value >> 0) & 0xff;
@@ -293,11 +293,8 @@ static void PutLE32(uint8_t* const dst, uint32_t value) {
 }

 #define BMP_HEADER_SIZE 54
-#define BMP_HEADER_ALPHA_EXTRA_SIZE 16  // for alpha info
 int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
  const int has_alpha = WebPIsAlphaMode(buffer->colorspace);
-  const int header_size =
-      BMP_HEADER_SIZE + (has_alpha ? BMP_HEADER_ALPHA_EXTRA_SIZE : 0);
  const uint32_t width = buffer->width;
  const uint32_t height = buffer->height;
  const uint8_t* rgba = buffer->u.RGBA.rgba;
@@ -306,9 +303,8 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
  uint32_t y;
  const uint32_t line_size = bytes_per_px * width;
  const uint32_t bmp_stride = (line_size + 3) & ~3;   // pad to 4
-  const uint32_t image_size = bmp_stride * height;
-  const uint32_t total_size =  image_size + header_size;
-  uint8_t bmp_header[BMP_HEADER_SIZE + BMP_HEADER_ALPHA_EXTRA_SIZE] = { 0 };
+  const uint32_t total_size = bmp_stride * height + BMP_HEADER_SIZE;
+  uint8_t bmp_header[BMP_HEADER_SIZE] = { 0 };

  if (fout == NULL || buffer == NULL || rgba == NULL) return 0;

@@ -316,37 +312,30 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
  PutLE16(bmp_header + 0, 0x4d42);                // signature 'BM'
  PutLE32(bmp_header + 2, total_size);            // size including header
  PutLE32(bmp_header + 6, 0);                     // reserved
-  PutLE32(bmp_header + 10, header_size);          // offset to pixel array
+  PutLE32(bmp_header + 10, BMP_HEADER_SIZE);      // offset to pixel array
  // bitmap info header
-  PutLE32(bmp_header + 14, header_size - 14);     // DIB header size
+  PutLE32(bmp_header + 14, 40);                   // DIB header size
  PutLE32(bmp_header + 18, width);                // dimensions
-  PutLE32(bmp_header + 22, height);               // no vertical flip
+  PutLE32(bmp_header + 22, -(int)height);         // vertical flip!
  PutLE16(bmp_header + 26, 1);                    // number of planes
  PutLE16(bmp_header + 28, bytes_per_px * 8);     // bits per pixel
-  PutLE32(bmp_header + 30, has_alpha ? 3 : 0);    // BI_BITFIELDS or BI_RGB
-  PutLE32(bmp_header + 34, image_size);
+  PutLE32(bmp_header + 30, 0);                    // no compression (BI_RGB)
+  PutLE32(bmp_header + 34, 0);                    // image size (placeholder)
  PutLE32(bmp_header + 38, 2400);                 // x pixels/meter
  PutLE32(bmp_header + 42, 2400);                 // y pixels/meter
  PutLE32(bmp_header + 46, 0);                    // number of palette colors
  PutLE32(bmp_header + 50, 0);                    // important color count
-  if (has_alpha) {  // BITMAPV3INFOHEADER complement
-    PutLE32(bmp_header + 54, 0x00ff0000);         // red mask
-    PutLE32(bmp_header + 58, 0x0000ff00);         // green mask
-    PutLE32(bmp_header + 62, 0x000000ff);         // blue mask
-    PutLE32(bmp_header + 66, 0xff000000);         // alpha mask
-  }

  // TODO(skal): color profile

  // write header
-  if (fwrite(bmp_header, header_size, 1, fout) != 1) {
+  if (fwrite(bmp_header, sizeof(bmp_header), 1, fout) != 1) {
    return 0;
  }

-  // write pixel array, bottom to top
+  // write pixel array
  for (y = 0; y < height; ++y) {
-    const uint8_t* const src = &rgba[(uint64_t)(height - 1 - y) * stride];
-    if (fwrite(src, line_size, 1, fout) != 1) {
+    if (fwrite(rgba, line_size, 1, fout) != 1) {
      return 0;
    }
    // write padding zeroes
@@ -356,11 +345,11 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
        return 0;
      }
    }
+    rgba += stride;
  }
  return 1;
 }
 #undef BMP_HEADER_SIZE
-#undef BMP_HEADER_ALPHA_EXTRA_SIZE

 //------------------------------------------------------------------------------
 // TIFF
--- a/imageio/image_enc.h
+++ b/imageio/image_enc.h
@@ -79,7 +79,7 @@ int WebPWriteTIFF(FILE* fout, const struct WebPDecBuffer* const buffer);
 int WebPWriteAlphaPlane(FILE* fout, const struct WebPDecBuffer* const buffer);

 // Save as YUV samples as PGM format (using IMC4 layout).
-// See: https://www.fourcc.org/yuv.php#IMC4.
+// See: http://www.fourcc.org/yuv.php#IMC4.
 // (very convenient format for viewing the samples, esp. for odd dimensions).
 int WebPWritePGM(FILE* fout, const struct WebPDecBuffer* const buffer);

--- a/imageio/jpegdec.c
+++ b/imageio/jpegdec.c
@@ -336,11 +336,7 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
  pic->width = width;
  pic->height = height;
  ok = WebPPictureImportRGB(pic, rgb, (int)stride);
-  if (!ok) {
-    pic->width = 0;   // WebPPictureImportRGB() barely touches 'pic' on failure.
-    pic->height = 0;  // Just reset dimensions but keep any 'custom_ptr' etc.
-    MetadataFree(metadata);  // In case the caller forgets to free it on error.
-  }
+  if (!ok) goto Error;

 End:
  free(rgb);
--- a/imageio/pngdec.c
+++ b/imageio/pngdec.c
@@ -133,7 +133,7 @@ static const struct {
                 MetadataPayload* const payload);
  size_t storage_offset;
 } kPNGMetadataMap[] = {
-  // https://exiftool.org/TagNames/PNG.html#TextualData
+  // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
  // See also: ExifTool on CPAN.
  { "Raw profile type exif", ProcessRawProfile, METADATA_OFFSET(exif) },
  { "Raw profile type xmp",  ProcessRawProfile, METADATA_OFFSET(xmp) },
--- a/imageio/tiffdec.c
+++ b/imageio/tiffdec.c
@@ -188,9 +188,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
    fprintf(stderr, "Error! Cannot retrieve TIFF samples-per-pixel info.\n");
    goto End;
  }
-  if (!(samples_per_px == 1 || samples_per_px == 3 || samples_per_px == 4)) {
-    goto End;  // not supported
-  }
+  if (samples_per_px < 3 || samples_per_px > 4) goto End;  // not supported

  if (!(TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &image_width) &&
        TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &image_height))) {
@@ -214,7 +212,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
      TIFFGetField(tif, TIFFTAG_TILELENGTH, &tile_height)) {
    if ((tile_width > 32 && tile_width / 2 > image_width) ||
        (tile_height > 32 && tile_height / 2 > image_height) ||
-        !ImgIoUtilCheckSizeArgumentsOverflow(
+        ImgIoUtilCheckSizeArgumentsOverflow(
            (uint64_t)tile_width * sizeof(*raster), tile_height)) {
      fprintf(stderr, "Error! TIFF tile dimension (%d x %d) is too large.\n",
              tile_width, tile_height);
--- a/infra/run_static_analysis.sh
+++ b/infra/run_static_analysis.sh
@@ -85,10 +85,10 @@ scan_build make -j4

 index="$(find "${OUTPUT_DIR}" -name index.html)"
 if [[ -f "${index}" ]]; then
-  mv "$(dirname "${index}")/"* "${OUTPUT_DIR}"
+  mv "$(dirname "${index}")/"* .
 else
  # make a empty report to wipe out any old bug reports.
-  cat << EOT > "${OUTPUT_DIR}/index.html"
+  cat << EOT > index.html
 <html>
 <body>
 No bugs reported.
--- a/iosbuild.sh
+++ b/iosbuild.sh
@@ -9,7 +9,7 @@
 # (the previous build will be erased if it exists).
 #
 # This script is inspired by the build script written by Carson McDonald.
-# (https://www.ioncannon.net/programming/1483/using-webp-to-reduce-native-ios-app-size/).
+# (http://www.ioncannon.net/programming/1483/using-webp-to-reduce-native-ios-app-size/).

 set -e

@@ -86,7 +86,7 @@ if [[ ! -e ${SRCDIR}/configure ]]; then
 Error creating configure script!
 This script requires the autoconf/automake and libtool to build. MacPorts can
 be used to obtain these:
-https://www.macports.org/install.php
+http://www.macports.org/install.php
 EOF
    exit 1
  fi
@@ -133,9 +133,10 @@ for PLATFORM in ${PLATFORMS}; do
    CFLAGS="${CFLAGS}"
  set +x

-  # Build only the libraries, skip the examples.
-  make V=0 -C sharpyuv
-  make V=0 -C src install
+  # run make only in the src/ directory to create libwebp.a/libwebpdecoder.a
+  cd src/
+  make V=0
+  make install

  LIBLIST+=" ${ROOTDIR}/lib/libwebp.a"
  DECLIBLIST+=" ${ROOTDIR}/lib/libwebpdecoder.a"
@@ -143,6 +144,7 @@ for PLATFORM in ${PLATFORMS}; do
  DEMUXLIBLIST+=" ${ROOTDIR}/lib/libwebpdemux.a"

  make clean
+  cd ..

  export PATH=${OLDPATH}
 done
--- a/makefile.unix
+++ b/makefile.unix
@@ -47,7 +47,7 @@ else
 endif

 # To install libraries on Mac OS X:
-# 1. Install MacPorts (https://www.macports.org/install.php)
+# 1. Install MacPorts (http://www.macports.org/install.php)
 # 2. Run "sudo port install jpeg"
 # 3. Run "sudo port install libpng"
 # 4. Run "sudo port install tiff"
@@ -125,14 +125,6 @@ endif
 ANIM_UTIL_OBJS = \
    examples/anim_util.o \

-SHARPYUV_OBJS = \
-    sharpyuv/sharpyuv.o \
-    sharpyuv/sharpyuv_csp.o \
-    sharpyuv/sharpyuv_dsp.o \
-    sharpyuv/sharpyuv_gamma.o \
-    sharpyuv/sharpyuv_neon.o \
-    sharpyuv/sharpyuv_sse2.o \
-
 DEC_OBJS = \
    src/dec/alpha_dec.o \
    src/dec/buffer_dec.o \
@@ -290,8 +282,8 @@ EXTRA_OBJS = \
    extras/quality_estimate.o \

 LIBWEBPDECODER_OBJS = $(DEC_OBJS) $(DSP_DEC_OBJS) $(UTILS_DEC_OBJS)
-LIBWEBP_OBJS = $(SHARPYUV_OBJS) $(LIBWEBPDECODER_OBJS) $(ENC_OBJS) \
-               $(DSP_ENC_OBJS) $(UTILS_ENC_OBJS)
+LIBWEBP_OBJS = $(LIBWEBPDECODER_OBJS) $(ENC_OBJS) $(DSP_ENC_OBJS) \
+               $(UTILS_ENC_OBJS)
 LIBWEBPMUX_OBJS = $(MUX_OBJS)
 LIBWEBPDEMUX_OBJS = $(DEMUX_OBJS)
 LIBWEBPEXTRA_OBJS = $(EXTRA_OBJS)
@@ -312,7 +304,6 @@ HDRS = \
    src/dec/vp8li_dec.h \
    src/dec/webpi_dec.h \
    src/dsp/common_sse2.h \
-    src/dsp/cpu.h \
    src/dsp/dsp.h \
    src/dsp/lossless.h \
    src/dsp/lossless_common.h \
@@ -498,7 +489,6 @@ clean:
              examples/*.o examples/*~ \
              extras/*.o extras/*~ \
              imageio/*.o imageio/*~ \
-              sharpyuv/*.o sharpyuv/*~ \
              src/dec/*.o src/dec/*~ \
              src/demux/*.o src/demux/*~ \
              src/dsp/*.o src/dsp/*~ \
--- a/man/cwebp.1
+++ b/man/cwebp.1
@@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH CWEBP 1 "March 17, 2022"
+.TH CWEBP 1 "November 19, 2020"
 .SH NAME
 cwebp \- compress an image file to a WebP file
 .SH SYNOPSIS
@@ -90,17 +90,15 @@ additional encoding possibilities and decide on the quality gain.
 Lower value can result in faster processing time at the expense of
 larger file size and lower compression quality.
 .TP
+.BI \-resize " width height
+Resize the source to a rectangle with size \fBwidth\fP x \fBheight\fP.
+If either (but not both) of the \fBwidth\fP or \fBheight\fP parameters is 0,
+the value will be calculated preserving the aspect\-ratio.
+.TP
 .BI \-crop " x_position y_position width height
 Crop the source to a rectangle with top\-left corner at coordinates
 (\fBx_position\fP, \fBy_position\fP) and size \fBwidth\fP x \fBheight\fP.
 This cropping area must be fully contained within the source rectangle.
-Note: the cropping is applied \fIbefore\fP any scaling.
-.TP
-.BI \-resize " width height
-Resize the source to a rectangle with size \fBwidth\fP x \fBheight\fP.
-If either (but not both) of the \fBwidth\fP or \fBheight\fP parameters is 0,
-the value will be calculated preserving the aspect\-ratio. Note: scaling
-is applied \fIafter\fP cropping.
 .TP
 .B \-mt
 Use multi\-threading for encoding, if possible.
@@ -224,7 +222,7 @@ Compute and report average PSNR (Peak\-Signal\-To\-Noise ratio).
 .TP
 .B \-print_ssim
 Compute and report average SSIM (structural similarity
-metric, see https://en.wikipedia.org/wiki/SSIM for additional details).
+metric, see http://en.wikipedia.org/wiki/SSIM for additional details).
 .TP
 .B \-print_lsim
 Compute and report local similarity metric (sum of lowest error amongst the
@@ -300,7 +298,7 @@ Please report all bugs to the issue tracker:
 https://bugs.chromium.org/p/webp
 .br
 Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+http://www.webmproject.org/code/contribute/submitting\-patches/

 .SH EXAMPLES
 cwebp \-q 50 -lossless picture.png \-o picture_lossless.webp
@@ -324,5 +322,5 @@ for the Debian project (and may be used by others).
 .BR dwebp (1),
 .BR gif2webp (1)
 .br
-Please refer to https://developers.google.com/speed/webp/ for additional
+Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/man/dwebp.1
+++ b/man/dwebp.1
@@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH DWEBP 1 "November 17, 2021"
+.TH DWEBP 1 "November 19, 2020"
 .SH NAME
 dwebp \- decompress a WebP file to an image file
 .SH SYNOPSIS
@@ -113,7 +113,7 @@ Please report all bugs to the issue tracker:
 https://bugs.chromium.org/p/webp
 .br
 Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+http://www.webmproject.org/code/contribute/submitting-patches/

 .SH EXAMPLES
 dwebp picture.webp \-o output.png
@@ -138,7 +138,7 @@ for the Debian project (and may be used by others).
 .BR gif2webp (1),
 .BR webpmux (1)
 .br
-Please refer to https://developers.google.com/speed/webp/ for additional
+Please refer to http://developers.google.com/speed/webp/ for additional
 information.
 .SS Output file format details
 PAM: http://netpbm.sourceforge.net/doc/pam.html
--- a/man/gif2webp.1
+++ b/man/gif2webp.1
@@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH GIF2WEBP 1 "November 17, 2021"
+.TH GIF2WEBP 1 "May 1, 2020"
 .SH NAME
 gif2webp \- Convert a GIF image to WebP
 .SH SYNOPSIS
@@ -131,7 +131,7 @@ Please report all bugs to the issue tracker:
 https://bugs.chromium.org/p/webp
 .br
 Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+http://www.webmproject.org/code/contribute/submitting-patches/

 .SH EXAMPLES
 gif2webp picture.gif \-o picture.webp
@@ -160,5 +160,5 @@ Debian project (and may be used by others).
 .BR dwebp (1),
 .BR webpmux (1)
 .br
-Please refer to https://developers.google.com/speed/webp/ for additional
+Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/man/img2webp.1
+++ b/man/img2webp.1
@@ -1,10 +1,10 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH IMG2WEBP 1 "January 5, 2022"
+.TH IMG2WEBP 1 "May 1, 2020"
 .SH NAME
 img2webp \- create animated WebP file from a sequence of input images.
 .SH SYNOPSIS
 .B img2webp
-[file_options] [[frame_options] frame_file]...
+[file_level_options] [files] [per_frame_options...]
 .br
 .B img2webp argument_file_name
 .br
@@ -86,7 +86,7 @@ Please report all bugs to the issue tracker:
 https://bugs.chromium.org/p/webp
 .br
 Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+http://www.webmproject.org/code/contribute/submitting\-patches/

 .SH AUTHORS
 \fBimg2webp\fP is a part of libwebp and was written by the WebP team.
@@ -101,5 +101,5 @@ for the Debian project (and may be used by others).
 .BR webpmux (1),
 .BR gif2webp (1)
 .br
-Please refer to https://developers.google.com/speed/webp/ for additional
+Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/man/vwebp.1
+++ b/man/vwebp.1
@@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH VWEBP 1 "November 17, 2021"
+.TH VWEBP 1 "June 5, 2019"
 .SH NAME
 vwebp \- decompress a WebP file and display it in a window
 .SH SYNOPSIS
@@ -77,7 +77,7 @@ Please report all bugs to the issue tracker:
 https://bugs.chromium.org/p/webp
 .br
 Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+http://www.webmproject.org/code/contribute/submitting-patches/

 .SH EXAMPLES
 vwebp picture.webp
@@ -97,5 +97,5 @@ This manual page was written for the Debian project (and may be used by others).
 .SH SEE ALSO
 .BR dwebp (1)
 .br
-Please refer to https://developers.google.com/speed/webp/ for additional
+Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/man/webpinfo.1
+++ b/man/webpinfo.1
@@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH WEBPINFO 1 "November 17, 2021"
+.TH WEBPINFO 1 "November 24, 2017"
 .SH NAME
 webpinfo \- print out the chunk level structure of WebP files
 along with basic integrity checks.
@@ -52,7 +52,7 @@ Please report all bugs to the issue tracker:
 https://bugs.chromium.org/p/webp
 .br
 Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+http://www.webmproject.org/code/contribute/submitting\-patches/

 .SH EXAMPLES
 .br
@@ -76,5 +76,5 @@ for the Debian project (and may be used by others).
 .SH SEE ALSO
 .BR webpmux (1)
 .br
-Please refer to https://developers.google.com/speed/webp/ for additional
+Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/man/webpmux.1
+++ b/man/webpmux.1
@@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH WEBPMUX 1 "November 17, 2021"
+.TH WEBPMUX 1 "November 3, 2021"
 .SH NAME
 webpmux \- create animated WebP files from non\-animated WebP images, extract
 frames from animated WebP images, and manage XMP/EXIF metadata and ICC profile.
@@ -191,7 +191,7 @@ Please report all bugs to the issue tracker:
 https://bugs.chromium.org/p/webp
 .br
 Patches welcome! See this page to get started:
-https://www.webmproject.org/code/contribute/submitting\-patches/
+http://www.webmproject.org/code/contribute/submitting\-patches/

 .SH EXAMPLES
 .P
@@ -267,5 +267,5 @@ for the Debian project (and may be used by others).
 .BR dwebp (1),
 .BR gif2webp (1)
 .br
-Please refer to https://developers.google.com/speed/webp/ for additional
+Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/sharpyuv/Makefile.am
+++ b/sharpyuv/Makefile.am
@@ -1,34 +0,0 @@
-AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
-AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
-noinst_LTLIBRARIES =
-noinst_LTLIBRARIES += libsharpyuv.la
-noinst_LTLIBRARIES += libsharpyuv_sse2.la
-noinst_LTLIBRARIES += libsharpyuv_neon.la
-
-noinst_HEADERS =
-noinst_HEADERS += ../src/webp/types.h
-noinst_HEADERS += ../src/dsp/cpu.h
-
-libsharpyuv_sse2_la_SOURCES =
-libsharpyuv_sse2_la_SOURCES += sharpyuv_sse2.c
-libsharpyuv_sse2_la_CPPFLAGS = $(libsharpyuv_la_CPPFLAGS)
-libsharpyuv_sse2_la_CFLAGS = $(AM_CFLAGS) $(SSE2_FLAGS)
-
-libsharpyuv_neon_la_SOURCES =
-libsharpyuv_neon_la_SOURCES += sharpyuv_neon.c
-libsharpyuv_neon_la_CPPFLAGS = $(libsharpyuv_la_CPPFLAGS)
-libsharpyuv_neon_la_CFLAGS = $(AM_CFLAGS) $(NEON_FLAGS)
-
-libsharpyuv_la_SOURCES =
-libsharpyuv_la_SOURCES += sharpyuv_csp.c sharpyuv_csp.h
-libsharpyuv_la_SOURCES += sharpyuv_dsp.c sharpyuv_dsp.h
-libsharpyuv_la_SOURCES += sharpyuv_gamma.c sharpyuv_gamma.h
-libsharpyuv_la_SOURCES += sharpyuv.c sharpyuv.h
-
-libsharpyuv_la_CPPFLAGS = $(AM_CPPFLAGS)
-libsharpyuv_la_LDFLAGS =
-libsharpyuv_la_LIBADD =
-libsharpyuv_la_LIBADD += libsharpyuv_sse2.la
-libsharpyuv_la_LIBADD += libsharpyuv_neon.la
-
-noinst_PROGRAMS =
--- a/sharpyuv/sharpyuv.c
+++ b/sharpyuv/sharpyuv.c
@@ -1,498 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Sharp RGB to YUV conversion.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "sharpyuv/sharpyuv.h"
-
-#include <assert.h>
-#include <limits.h>
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "src/webp/types.h"
-#include "src/dsp/cpu.h"
-#include "sharpyuv/sharpyuv_dsp.h"
-#include "sharpyuv/sharpyuv_gamma.h"
-
-//------------------------------------------------------------------------------
-// Sharp RGB->YUV conversion
-
-static const int kNumIterations = 4;
-
-#define YUV_FIX 16  // fixed-point precision for RGB->YUV
-static const int kYuvHalf = 1 << (YUV_FIX - 1);
-
-// Max bit depth so that intermediate calculations fit in 16 bits.
-static const int kMaxBitDepth = 14;
-
-// Returns the precision shift to use based on the input rgb_bit_depth.
-static int GetPrecisionShift(int rgb_bit_depth) {
-  // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
-  // bits if needed.
-  return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
-                                               : (kMaxBitDepth - rgb_bit_depth);
-}
-
-typedef int16_t fixed_t;      // signed type with extra precision for UV
-typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W
-
-//------------------------------------------------------------------------------
-
-static uint8_t clip_8b(fixed_t v) {
-  return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
-}
-
-static uint16_t clip(fixed_t v, int max) {
-  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
-}
-
-static fixed_y_t clip_bit_depth(int y, int bit_depth) {
-  const int max = (1 << bit_depth) - 1;
-  return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
-}
-
-//------------------------------------------------------------------------------
-
-static int RGBToGray(int64_t r, int64_t g, int64_t b) {
-  const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
-  return (int)(luma >> YUV_FIX);
-}
-
-static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
-                          int rgb_bit_depth) {
-  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
-  const uint32_t A = SharpYuvGammaToLinear(a, bit_depth);
-  const uint32_t B = SharpYuvGammaToLinear(b, bit_depth);
-  const uint32_t C = SharpYuvGammaToLinear(c, bit_depth);
-  const uint32_t D = SharpYuvGammaToLinear(d, bit_depth);
-  return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth);
-}
-
-static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
-                                int rgb_bit_depth) {
-  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
-  int i;
-  for (i = 0; i < w; ++i) {
-    const uint32_t R = SharpYuvGammaToLinear(src[0 * w + i], bit_depth);
-    const uint32_t G = SharpYuvGammaToLinear(src[1 * w + i], bit_depth);
-    const uint32_t B = SharpYuvGammaToLinear(src[2 * w + i], bit_depth);
-    const uint32_t Y = RGBToGray(R, G, B);
-    dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth);
-  }
-}
-
-static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
-                         fixed_t* dst, int uv_w, int rgb_bit_depth) {
-  int i;
-  for (i = 0; i < uv_w; ++i) {
-    const int r =
-        ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
-                  src2[0 * uv_w + 1], rgb_bit_depth);
-    const int g =
-        ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
-                  src2[2 * uv_w + 1], rgb_bit_depth);
-    const int b =
-        ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
-                  src2[4 * uv_w + 1], rgb_bit_depth);
-    const int W = RGBToGray(r, g, b);
-    dst[0 * uv_w] = (fixed_t)(r - W);
-    dst[1 * uv_w] = (fixed_t)(g - W);
-    dst[2 * uv_w] = (fixed_t)(b - W);
-    dst  += 1;
-    src1 += 2;
-    src2 += 2;
-  }
-}
-
-static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
-  int i;
-  assert(w > 0);
-  for (i = 0; i < w; ++i) {
-    y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
-  }
-}
-
-//------------------------------------------------------------------------------
-
-static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
-  const int v0 = (A * 3 + B + 2) >> 2;
-  return clip_bit_depth(v0 + W0, bit_depth);
-}
-
-//------------------------------------------------------------------------------
-
-static WEBP_INLINE int Shift(int v, int shift) {
-  return (shift >= 0) ? (v << shift) : (v >> -shift);
-}
-
-static void ImportOneRow(const uint8_t* const r_ptr,
-                         const uint8_t* const g_ptr,
-                         const uint8_t* const b_ptr,
-                         int rgb_step,
-                         int rgb_bit_depth,
-                         int pic_width,
-                         fixed_y_t* const dst) {
-  // Convert the rgb_step from a number of bytes to a number of uint8_t or
-  // uint16_t values depending the bit depth.
-  const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
-  int i;
-  const int w = (pic_width + 1) & ~1;
-  for (i = 0; i < pic_width; ++i) {
-    const int off = i * step;
-    const int shift = GetPrecisionShift(rgb_bit_depth);
-    if (rgb_bit_depth == 8) {
-      dst[i + 0 * w] = Shift(r_ptr[off], shift);
-      dst[i + 1 * w] = Shift(g_ptr[off], shift);
-      dst[i + 2 * w] = Shift(b_ptr[off], shift);
-    } else {
-      dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
-      dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
-      dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
-    }
-  }
-  if (pic_width & 1) {  // replicate rightmost pixel
-    dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
-    dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
-    dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
-  }
-}
-
-static void InterpolateTwoRows(const fixed_y_t* const best_y,
-                               const fixed_t* prev_uv,
-                               const fixed_t* cur_uv,
-                               const fixed_t* next_uv,
-                               int w,
-                               fixed_y_t* out1,
-                               fixed_y_t* out2,
-                               int rgb_bit_depth) {
-  const int uv_w = w >> 1;
-  const int len = (w - 1) >> 1;   // length to filter
-  int k = 3;
-  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
-  while (k-- > 0) {   // process each R/G/B segments in turn
-    // special boundary case for i==0
-    out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
-    out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
-
-    SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
-                      bit_depth);
-    SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
-                      bit_depth);
-
-    // special boundary case for i == w - 1 when w is even
-    if (!(w & 1)) {
-      out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
-                            best_y[w - 1 + 0], bit_depth);
-      out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
-                            best_y[w - 1 + w], bit_depth);
-    }
-    out1 += w;
-    out2 += w;
-    prev_uv += uv_w;
-    cur_uv  += uv_w;
-    next_uv += uv_w;
-  }
-}
-
-static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
-                                         const int coeffs[4], int sfix) {
-  const int srounder = 1 << (YUV_FIX + sfix - 1);
-  const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
-                   coeffs[3] + srounder;
-  return (luma >> (YUV_FIX + sfix));
-}
-
-static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
-                            uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
-                            int u_stride, uint8_t* v_ptr, int v_stride,
-                            int rgb_bit_depth,
-                            int yuv_bit_depth, int width, int height,
-                            const SharpYuvConversionMatrix* yuv_matrix) {
-  int i, j;
-  const fixed_t* const best_uv_base = best_uv;
-  const int w = (width + 1) & ~1;
-  const int h = (height + 1) & ~1;
-  const int uv_w = w >> 1;
-  const int uv_h = h >> 1;
-  const int sfix = GetPrecisionShift(rgb_bit_depth);
-  const int yuv_max = (1 << yuv_bit_depth) - 1;
-
-  for (best_uv = best_uv_base, j = 0; j < height; ++j) {
-    for (i = 0; i < width; ++i) {
-      const int off = (i >> 1);
-      const int W = best_y[i];
-      const int r = best_uv[off + 0 * uv_w] + W;
-      const int g = best_uv[off + 1 * uv_w] + W;
-      const int b = best_uv[off + 2 * uv_w] + W;
-      const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
-      if (yuv_bit_depth <= 8) {
-        y_ptr[i] = clip_8b(y);
-      } else {
-        ((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
-      }
-    }
-    best_y += w;
-    best_uv += (j & 1) * 3 * uv_w;
-    y_ptr += y_stride;
-  }
-  for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
-    for (i = 0; i < uv_w; ++i) {
-      const int off = i;
-      // Note r, g and b values here are off by W, but a constant offset on all
-      // 3 components doesn't change the value of u and v with a YCbCr matrix.
-      const int r = best_uv[off + 0 * uv_w];
-      const int g = best_uv[off + 1 * uv_w];
-      const int b = best_uv[off + 2 * uv_w];
-      const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
-      const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
-      if (yuv_bit_depth <= 8) {
-        u_ptr[i] = clip_8b(u);
-        v_ptr[i] = clip_8b(v);
-      } else {
-        ((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
-        ((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
-      }
-    }
-    best_uv += 3 * uv_w;
-    u_ptr += u_stride;
-    v_ptr += v_stride;
-  }
-  return 1;
-}
-
-//------------------------------------------------------------------------------
-// Main function
-
-static void* SafeMalloc(uint64_t nmemb, size_t size) {
-  const uint64_t total_size = nmemb * (uint64_t)size;
-  if (total_size != (size_t)total_size) return NULL;
-  return malloc((size_t)total_size);
-}
-
-#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T)))
-
-static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
-                            const uint8_t* b_ptr, int rgb_step, int rgb_stride,
-                            int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
-                            uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
-                            int v_stride, int yuv_bit_depth, int width,
-                            int height,
-                            const SharpYuvConversionMatrix* yuv_matrix) {
-  // we expand the right/bottom border if needed
-  const int w = (width + 1) & ~1;
-  const int h = (height + 1) & ~1;
-  const int uv_w = w >> 1;
-  const int uv_h = h >> 1;
-  uint64_t prev_diff_y_sum = ~0;
-  int j, iter;
-
-  // TODO(skal): allocate one big memory chunk. But for now, it's easier
-  // for valgrind debugging to have several chunks.
-  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
-  fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
-  fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
-  fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
-  fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
-  fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
-  fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
-  fixed_y_t* best_y = best_y_base;
-  fixed_y_t* target_y = target_y_base;
-  fixed_t* best_uv = best_uv_base;
-  fixed_t* target_uv = target_uv_base;
-  const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
-  int ok;
-  assert(w > 0);
-  assert(h > 0);
-
-  if (best_y_base == NULL || best_uv_base == NULL ||
-      target_y_base == NULL || target_uv_base == NULL ||
-      best_rgb_y == NULL || best_rgb_uv == NULL ||
-      tmp_buffer == NULL) {
-    ok = 0;
-    goto End;
-  }
-
-  // Import RGB samples to W/RGB representation.
-  for (j = 0; j < height; j += 2) {
-    const int is_last_row = (j == height - 1);
-    fixed_y_t* const src1 = tmp_buffer + 0 * w;
-    fixed_y_t* const src2 = tmp_buffer + 3 * w;
-
-    // prepare two rows of input
-    ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
-                 src1);
-    if (!is_last_row) {
-      ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
-                   rgb_step, rgb_bit_depth, width, src2);
-    } else {
-      memcpy(src2, src1, 3 * w * sizeof(*src2));
-    }
-    StoreGray(src1, best_y + 0, w);
-    StoreGray(src2, best_y + w, w);
-
-    UpdateW(src1, target_y, w, rgb_bit_depth);
-    UpdateW(src2, target_y + w, w, rgb_bit_depth);
-    UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth);
-    memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
-    best_y += 2 * w;
-    best_uv += 3 * uv_w;
-    target_y += 2 * w;
-    target_uv += 3 * uv_w;
-    r_ptr += 2 * rgb_stride;
-    g_ptr += 2 * rgb_stride;
-    b_ptr += 2 * rgb_stride;
-  }
-
-  // Iterate and resolve clipping conflicts.
-  for (iter = 0; iter < kNumIterations; ++iter) {
-    const fixed_t* cur_uv = best_uv_base;
-    const fixed_t* prev_uv = best_uv_base;
-    uint64_t diff_y_sum = 0;
-
-    best_y = best_y_base;
-    best_uv = best_uv_base;
-    target_y = target_y_base;
-    target_uv = target_uv_base;
-    for (j = 0; j < h; j += 2) {
-      fixed_y_t* const src1 = tmp_buffer + 0 * w;
-      fixed_y_t* const src2 = tmp_buffer + 3 * w;
-      {
-        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
-        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
-                           src1, src2, rgb_bit_depth);
-        prev_uv = cur_uv;
-        cur_uv = next_uv;
-      }
-
-      UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth);
-      UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth);
-      UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth);
-
-      // update two rows of Y and one row of RGB
-      diff_y_sum +=
-          SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w,
-                          rgb_bit_depth + GetPrecisionShift(rgb_bit_depth));
-      SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
-
-      best_y += 2 * w;
-      best_uv += 3 * uv_w;
-      target_y += 2 * w;
-      target_uv += 3 * uv_w;
-    }
-    // test exit condition
-    if (iter > 0) {
-      if (diff_y_sum < diff_y_threshold) break;
-      if (diff_y_sum > prev_diff_y_sum) break;
-    }
-    prev_diff_y_sum = diff_y_sum;
-  }
-
-  // final reconstruction
-  ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
-                        u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
-                        width, height, yuv_matrix);
-
- End:
-  free(best_y_base);
-  free(best_uv_base);
-  free(target_y_base);
-  free(target_uv_base);
-  free(best_rgb_y);
-  free(best_rgb_uv);
-  free(tmp_buffer);
-  return ok;
-}
-#undef SAFE_ALLOC
-
-// Hidden exported init function.
-// By default SharpYuvConvert calls it with NULL. If needed, users can declare
-// it as extern and call it with a VP8CPUInfo function.
-extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
-void SharpYuvInit(VP8CPUInfo cpu_info_func) {
-  static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
-      (VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
-  const int initialized =
-      (sharpyuv_last_cpuinfo_used != (VP8CPUInfo)&sharpyuv_last_cpuinfo_used);
-  if (cpu_info_func == NULL && initialized) return;
-  if (sharpyuv_last_cpuinfo_used == cpu_info_func) return;
-
-  SharpYuvInitDsp(cpu_info_func);
-  if (!initialized) {
-    SharpYuvInitGammaTables();
-  }
-
-  sharpyuv_last_cpuinfo_used = cpu_info_func;
-}
-
-int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
-                    const void* b_ptr, int rgb_step, int rgb_stride,
-                    int rgb_bit_depth, void* y_ptr, int y_stride,
-                    void* u_ptr, int u_stride, void* v_ptr,
-                    int v_stride, int yuv_bit_depth, int width,
-                    int height, const SharpYuvConversionMatrix* yuv_matrix) {
-  SharpYuvConversionMatrix scaled_matrix;
-  const int rgb_max = (1 << rgb_bit_depth) - 1;
-  const int rgb_round = 1 << (rgb_bit_depth - 1);
-  const int yuv_max = (1 << yuv_bit_depth) - 1;
-  const int sfix = GetPrecisionShift(rgb_bit_depth);
-
-  if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||
-      r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
-      u_ptr == NULL || v_ptr == NULL) {
-    return 0;
-  }
-  if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
-      rgb_bit_depth != 16) {
-    return 0;
-  }
-  if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
-    return 0;
-  }
-  if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) {
-    // Step/stride should be even for uint16_t buffers.
-    return 0;
-  }
-  if (yuv_bit_depth > 8 &&
-      (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
-    // Stride should be even for uint16_t buffers.
-    return 0;
-  }
-  SharpYuvInit(NULL);
-
-  // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
-  // rgb->yuv conversion matrix.
-  if (rgb_bit_depth == yuv_bit_depth) {
-    memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
-  } else {
-    int i;
-    for (i = 0; i < 3; ++i) {
-      scaled_matrix.rgb_to_y[i] =
-          (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
-      scaled_matrix.rgb_to_u[i] =
-          (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
-      scaled_matrix.rgb_to_v[i] =
-          (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
-    }
-  }
-  // Also incorporate precision change scaling.
-  scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
-  scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
-  scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
-
-  return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
-                          rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
-                          v_ptr, v_stride, yuv_bit_depth, width, height,
-                          &scaled_matrix);
-}
-
-//------------------------------------------------------------------------------
--- a/sharpyuv/sharpyuv.h
+++ b/sharpyuv/sharpyuv.h
@@ -1,81 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Sharp RGB to YUV conversion.
-
-#ifndef WEBP_SHARPYUV_SHARPYUV_H_
-#define WEBP_SHARPYUV_SHARPYUV_H_
-
-#include <inttypes.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// SharpYUV API version following the convention from semver.org
-#define SHARPYUV_VERSION_MAJOR 0
-#define SHARPYUV_VERSION_MINOR 1
-#define SHARPYUV_VERSION_PATCH 0
-// Version as a uint32_t. The major number is the high 8 bits.
-// The minor number is the middle 8 bits. The patch number is the low 16 bits.
-#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
-  (((MAJOR) << 24) | ((MINOR) << 16) | (PATCH))
-#define SHARPYUV_VERSION                                                \
-  SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \
-                        SHARPYUV_VERSION_PATCH)
-
-// RGB to YUV conversion matrix, in 16 bit fixed point.
-// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
-// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
-// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
-// Then y, u and v values are divided by 1<<16 and rounded.
-typedef struct {
-  int rgb_to_y[4];
-  int rgb_to_u[4];
-  int rgb_to_v[4];
-} SharpYuvConversionMatrix;
-
-// Converts RGB to YUV420 using a downsampling algorithm that minimizes
-// artefacts caused by chroma subsampling.
-// This is slower than standard downsampling (averaging of 4 UV values).
-// Assumes that the image will be upsampled using a bilinear filter. If nearest
-// neighbor is used instead, the upsampled image might look worse than with
-// standard downsampling.
-// r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point
-//     to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise.
-// rgb_step: distance in bytes between two horizontally adjacent pixels on the
-//     r, g and b channels. If rgb_bit_depth is > 8, it should be a
-//     multiple of 2.
-// rgb_stride: distance in bytes between two vertically adjacent pixels on the
-//     r, g, and b channels. If rgb_bit_depth is > 8, it should be a
-//     multiple of 2.
-// rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16.
-//     Note: 16 bit input is truncated to 14 bits before conversion to yuv.
-// yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12.
-// y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels.  Should
-//     point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers
-//     otherwise.
-// y_stride, u_stride, v_stride: distance in bytes between two vertically
-//     adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
-//     should be multiples of 2.
-// width, height: width and height of the image in pixels
-int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
-                    int rgb_step, int rgb_stride, int rgb_bit_depth,
-                    void* y_ptr, int y_stride, void* u_ptr, int u_stride,
-                    void* v_ptr, int v_stride, int yuv_bit_depth, int width,
-                    int height, const SharpYuvConversionMatrix* yuv_matrix);
-
-// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422
-// support (it's rarely used in practice, especially for images).
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // WEBP_SHARPYUV_SHARPYUV_H_
--- a/sharpyuv/sharpyuv_csp.c
+++ b/sharpyuv/sharpyuv_csp.c
@@ -1,110 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Colorspace utilities.
-
-#include "sharpyuv/sharpyuv_csp.h"
-
-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
-static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }
-
-void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
-                                     SharpYuvConversionMatrix* matrix) {
-  const float kr = yuv_color_space->kr;
-  const float kb = yuv_color_space->kb;
-  const float kg = 1.0f - kr - kb;
-  const float cr = 0.5f / (1.0f - kb);
-  const float cb = 0.5f / (1.0f - kr);
-
-  const int shift = yuv_color_space->bit_depth - 8;
-
-  const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
-  float scale_y = 1.0f;
-  float add_y = 0.0f;
-  float scale_u = cr;
-  float scale_v = cb;
-  float add_uv = (float)(128 << shift);
-  assert(yuv_color_space->bit_depth >= 8);
-
-  if (yuv_color_space->range == kSharpYuvRangeLimited) {
-    scale_y *= (219 << shift) / denom;
-    scale_u *= (224 << shift) / denom;
-    scale_v *= (224 << shift) / denom;
-    add_y = (float)(16 << shift);
-  }
-
-  matrix->rgb_to_y[0] = ToFixed16(kr * scale_y);
-  matrix->rgb_to_y[1] = ToFixed16(kg * scale_y);
-  matrix->rgb_to_y[2] = ToFixed16(kb * scale_y);
-  matrix->rgb_to_y[3] = ToFixed16(add_y);
-
-  matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u);
-  matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u);
-  matrix->rgb_to_u[2] = ToFixed16((1 - kb) * scale_u);
-  matrix->rgb_to_u[3] = ToFixed16(add_uv);
-
-  matrix->rgb_to_v[0] = ToFixed16((1 - kr) * scale_v);
-  matrix->rgb_to_v[1] = ToFixed16(-kg * scale_v);
-  matrix->rgb_to_v[2] = ToFixed16(-kb * scale_v);
-  matrix->rgb_to_v[3] = ToFixed16(add_uv);
-}
-
-// Matrices are in YUV_FIX fixed point precision.
-// WebP's matrix, similar but not identical to kRec601LimitedMatrix.
-static const SharpYuvConversionMatrix kWebpMatrix = {
-  {16839, 33059, 6420, 16 << 16},
-  {-9719, -19081, 28800, 128 << 16},
-  {28800, -24116, -4684, 128 << 16},
-};
-// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited
-static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
-  {16829, 33039, 6416, 16 << 16},
-  {-9714, -19071, 28784, 128 << 16},
-  {28784, -24103, -4681, 128 << 16},
-};
-// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull
-static const SharpYuvConversionMatrix kRec601FullMatrix = {
-  {19595, 38470, 7471, 0},
-  {-11058, -21710, 32768, 128 << 16},
-  {32768, -27439, -5329, 128 << 16},
-};
-// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited
-static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
-  {11966, 40254, 4064, 16 << 16},
-  {-6596, -22189, 28784, 128 << 16},
-  {28784, -26145, -2639, 128 << 16},
-};
-// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull
-static const SharpYuvConversionMatrix kRec709FullMatrix = {
-  {13933, 46871, 4732, 0},
-  {-7509, -25259, 32768, 128 << 16},
-  {32768, -29763, -3005, 128 << 16},
-};
-
-const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
-    SharpYuvMatrixType matrix_type) {
-  switch (matrix_type) {
-    case kSharpYuvMatrixWebp:
-      return &kWebpMatrix;
-    case kSharpYuvMatrixRec601Limited:
-      return &kRec601LimitedMatrix;
-    case kSharpYuvMatrixRec601Full:
-      return &kRec601FullMatrix;
-    case kSharpYuvMatrixRec709Limited:
-      return &kRec709LimitedMatrix;
-    case kSharpYuvMatrixRec709Full:
-      return &kRec709FullMatrix;
-    case kSharpYuvMatrixNum:
-      return NULL;
-  }
-  return NULL;
-}
--- a/sharpyuv/sharpyuv_csp.h
+++ b/sharpyuv/sharpyuv_csp.h
@@ -1,59 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Colorspace utilities.
-
-#ifndef WEBP_SHARPYUV_SHARPYUV_CSP_H_
-#define WEBP_SHARPYUV_SHARPYUV_CSP_H_
-
-#include "sharpyuv/sharpyuv.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Range of YUV values.
-typedef enum {
-  kSharpYuvRangeFull,     // YUV values between [0;255] (for 8 bit)
-  kSharpYuvRangeLimited   // Y in [16;235], YUV in [16;240] (for 8 bit)
-} SharpYuvRange;
-
-// Constants that define a YUV color space.
-typedef struct {
-  // Kr and Kb are defined such that:
-  // Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb.
-  float kr;
-  float kb;
-  int bit_depth;  // 8, 10 or 12
-  SharpYuvRange range;
-} SharpYuvColorSpace;
-
-// Fills in 'matrix' for the given YUVColorSpace.
-void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
-                                     SharpYuvConversionMatrix* matrix);
-
-// Enums for precomputed conversion matrices.
-typedef enum {
-  kSharpYuvMatrixWebp = 0,
-  kSharpYuvMatrixRec601Limited,
-  kSharpYuvMatrixRec601Full,
-  kSharpYuvMatrixRec709Limited,
-  kSharpYuvMatrixRec709Full,
-  kSharpYuvMatrixNum
-} SharpYuvMatrixType;
-
-// Returns a pointer to a matrix for one of the predefined colorspaces.
-const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
-    SharpYuvMatrixType matrix_type);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // WEBP_SHARPYUV_SHARPYUV_CSP_H_
--- a/sharpyuv/sharpyuv_dsp.c
+++ b/sharpyuv/sharpyuv_dsp.c
@@ -1,102 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Speed-critical functions for Sharp YUV.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "sharpyuv/sharpyuv_dsp.h"
-
-#include <assert.h>
-#include <stdlib.h>
-
-#include "src/dsp/cpu.h"
-
-//-----------------------------------------------------------------------------
-
-#if !WEBP_NEON_OMIT_C_CODE
-static uint16_t clip(int v, int max) {
-  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
-}
-
-static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src,
-                                  uint16_t* dst, int len, int bit_depth) {
-  uint64_t diff = 0;
-  int i;
-  const int max_y = (1 << bit_depth) - 1;
-  for (i = 0; i < len; ++i) {
-    const int diff_y = ref[i] - src[i];
-    const int new_y = (int)dst[i] + diff_y;
-    dst[i] = clip(new_y, max_y);
-    diff += (uint64_t)abs(diff_y);
-  }
-  return diff;
-}
-
-static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src,
-                                int16_t* dst, int len) {
-  int i;
-  for (i = 0; i < len; ++i) {
-    const int diff_uv = ref[i] - src[i];
-    dst[i] += diff_uv;
-  }
-}
-
-static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len,
-                                const uint16_t* best_y, uint16_t* out,
-                                int bit_depth) {
-  int i;
-  const int max_y = (1 << bit_depth) - 1;
-  for (i = 0; i < len; ++i, ++A, ++B) {
-    const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
-    const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
-    out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y);
-    out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y);
-  }
-}
-#endif  // !WEBP_NEON_OMIT_C_CODE
-
-//-----------------------------------------------------------------------------
-
-uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
-                            uint16_t* dst, int len, int bit_depth);
-void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst,
-                          int len);
-void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
-                          const uint16_t* best_y, uint16_t* out,
-                          int bit_depth);
-
-extern void InitSharpYuvSSE2(void);
-extern void InitSharpYuvNEON(void);
-
-void SharpYuvInitDsp(VP8CPUInfo cpu_info_func) {
-  (void)cpu_info_func;
-
-#if !WEBP_NEON_OMIT_C_CODE
-  SharpYuvUpdateY = SharpYuvUpdateY_C;
-  SharpYuvUpdateRGB = SharpYuvUpdateRGB_C;
-  SharpYuvFilterRow = SharpYuvFilterRow_C;
-#endif
-
-#if defined(WEBP_HAVE_SSE2)
-  if (cpu_info_func == NULL || cpu_info_func(kSSE2)) {
-    InitSharpYuvSSE2();
-  }
-#endif  // WEBP_HAVE_SSE2
-
-#if defined(WEBP_HAVE_NEON)
-  if (WEBP_NEON_OMIT_C_CODE || cpu_info_func == NULL || cpu_info_func(kNEON)) {
-    InitSharpYuvNEON();
-  }
-#endif  // WEBP_HAVE_NEON
-
-  assert(SharpYuvUpdateY != NULL);
-  assert(SharpYuvUpdateRGB != NULL);
-  assert(SharpYuvFilterRow != NULL);
-}
--- a/sharpyuv/sharpyuv_dsp.h
+++ b/sharpyuv/sharpyuv_dsp.h
@@ -1,29 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Speed-critical functions for Sharp YUV.
-
-#ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_
-#define WEBP_SHARPYUV_SHARPYUV_DSP_H_
-
-#include <stdint.h>
-
-#include "src/dsp/cpu.h"
-
-extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
-                                   uint16_t* dst, int len, int bit_depth);
-extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref,
-                                 int16_t* dst, int len);
-extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
-                                 const uint16_t* best_y, uint16_t* out,
-                                 int bit_depth);
-
-void SharpYuvInitDsp(VP8CPUInfo cpu_info_func);
-
-#endif  // WEBP_SHARPYUV_SHARPYUV_DSP_H_
--- a/sharpyuv/sharpyuv_gamma.c
+++ b/sharpyuv/sharpyuv_gamma.c
@@ -1,114 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Gamma correction utilities.
-
-#include "sharpyuv/sharpyuv_gamma.h"
-
-#include <assert.h>
-#include <math.h>
-#include <stdint.h>
-
-#include "src/webp/types.h"
-
-// Gamma correction compensates loss of resolution during chroma subsampling.
-// Size of pre-computed table for converting from gamma to linear.
-#define GAMMA_TO_LINEAR_TAB_BITS 10
-#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
-static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
-#define LINEAR_TO_GAMMA_TAB_BITS 9
-#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
-static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
-
-static const double kGammaF = 1. / 0.45;
-#define GAMMA_TO_LINEAR_BITS 16
-
-static volatile int kGammaTablesSOk = 0;
-void SharpYuvInitGammaTables(void) {
-  assert(GAMMA_TO_LINEAR_BITS <= 16);
-  if (!kGammaTablesSOk) {
-    int v;
-    const double a = 0.09929682680944;
-    const double thresh = 0.018053968510807;
-    const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
-    // Precompute gamma to linear table.
-    {
-      const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
-      const double a_rec = 1. / (1. + a);
-      for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
-        const double g = norm * v;
-        double value;
-        if (g <= thresh * 4.5) {
-          value = g / 4.5;
-        } else {
-          value = pow(a_rec * (g + a), kGammaF);
-        }
-        kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
-      }
-      // to prevent small rounding errors to cause read-overflow:
-      kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
-          kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
-    }
-    // Precompute linear to gamma table.
-    {
-      const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
-      for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
-        const double g = scale * v;
-        double value;
-        if (g <= thresh) {
-          value = 4.5 * g;
-        } else {
-          value = (1. + a) * pow(g, 1. / kGammaF) - a;
-        }
-        kLinearToGammaTabS[v] =
-            (uint32_t)(final_scale * value + 0.5);
-      }
-      // to prevent small rounding errors to cause read-overflow:
-      kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
-          kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
-    }
-    kGammaTablesSOk = 1;
-  }
-}
-
-static WEBP_INLINE int Shift(int v, int shift) {
-  return (shift >= 0) ? (v << shift) : (v >> -shift);
-}
-
-static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
-                                                    int tab_pos_shift_right,
-                                                    int tab_value_shift) {
-  const uint32_t tab_pos = Shift(v, -tab_pos_shift_right);
-  // fractional part, in 'tab_pos_shift' fixed-point precision
-  const uint32_t x = v - (tab_pos << tab_pos_shift_right);  // fractional part
-  // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
-  const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift);
-  const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift);
-  // Final interpolation.
-  const uint32_t v2 = (v1 - v0) * x;  // note: v1 >= v0.
-  const int half =
-      (tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0;
-  const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right);
-  return result;
-}
-
-uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth) {
-  const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
-  if (shift > 0) {
-    return kGammaToLinearTabS[v << shift];
-  }
-  return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
-}
-
-uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth) {
-  return FixedPointInterpolation(
-      value, kLinearToGammaTabS,
-      (GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS),
-      bit_depth - GAMMA_TO_LINEAR_BITS);
-}
--- a/sharpyuv/sharpyuv_gamma.h
+++ b/sharpyuv/sharpyuv_gamma.h
@@ -1,35 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Gamma correction utilities.
-
-#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
-#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Initializes precomputed tables. Must be called once before calling
-// SharpYuvGammaToLinear or SharpYuvLinearToGamma.
-void SharpYuvInitGammaTables(void);
-
-// Converts a gamma color value on 'bit_depth' bits to a 16 bit linear value.
-uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth);
-
-// Converts a 16 bit linear color value to a gamma value on 'bit_depth' bits.
-uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
--- a/sharpyuv/sharpyuv_neon.c
+++ b/sharpyuv/sharpyuv_neon.c
@@ -1,182 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Speed-critical functions for Sharp YUV.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "sharpyuv/sharpyuv_dsp.h"
-
-#if defined(WEBP_USE_NEON)
-#include <assert.h>
-#include <stdlib.h>
-#include <arm_neon.h>
-#endif
-
-extern void InitSharpYuvNEON(void);
-
-#if defined(WEBP_USE_NEON)
-
-static uint16_t clip_NEON(int v, int max) {
-  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
-}
-
-static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
-                                     uint16_t* dst, int len, int bit_depth) {
-  const int max_y = (1 << bit_depth) - 1;
-  int i;
-  const int16x8_t zero = vdupq_n_s16(0);
-  const int16x8_t max = vdupq_n_s16(max_y);
-  uint64x2_t sum = vdupq_n_u64(0);
-  uint64_t diff;
-
-  for (i = 0; i + 8 <= len; i += 8) {
-    const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
-    const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
-    const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
-    const int16x8_t D = vsubq_s16(A, B);       // diff_y
-    const int16x8_t F = vaddq_s16(C, D);       // new_y
-    const uint16x8_t H =
-        vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
-    const int16x8_t I = vabsq_s16(D);          // abs(diff_y)
-    vst1q_u16(dst + i, H);
-    sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
-  }
-  diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
-  for (; i < len; ++i) {
-    const int diff_y = ref[i] - src[i];
-    const int new_y = (int)(dst[i]) + diff_y;
-    dst[i] = clip_NEON(new_y, max_y);
-    diff += (uint64_t)(abs(diff_y));
-  }
-  return diff;
-}
-
-static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
-                                   int16_t* dst, int len) {
-  int i;
-  for (i = 0; i + 8 <= len; i += 8) {
-    const int16x8_t A = vld1q_s16(ref + i);
-    const int16x8_t B = vld1q_s16(src + i);
-    const int16x8_t C = vld1q_s16(dst + i);
-    const int16x8_t D = vsubq_s16(A, B);   // diff_uv
-    const int16x8_t E = vaddq_s16(C, D);   // new_uv
-    vst1q_s16(dst + i, E);
-  }
-  for (; i < len; ++i) {
-    const int diff_uv = ref[i] - src[i];
-    dst[i] += diff_uv;
-  }
-}
-
-static void SharpYuvFilterRow16_NEON(const int16_t* A, const int16_t* B,
-                                     int len, const uint16_t* best_y,
-                                     uint16_t* out, int bit_depth) {
-  const int max_y = (1 << bit_depth) - 1;
-  int i;
-  const int16x8_t max = vdupq_n_s16(max_y);
-  const int16x8_t zero = vdupq_n_s16(0);
-  for (i = 0; i + 8 <= len; i += 8) {
-    const int16x8_t a0 = vld1q_s16(A + i + 0);
-    const int16x8_t a1 = vld1q_s16(A + i + 1);
-    const int16x8_t b0 = vld1q_s16(B + i + 0);
-    const int16x8_t b1 = vld1q_s16(B + i + 1);
-    const int16x8_t a0b1 = vaddq_s16(a0, b1);
-    const int16x8_t a1b0 = vaddq_s16(a1, b0);
-    const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0);  // A0+A1+B0+B1
-    const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1);    // 2*(A0+B1)
-    const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0);    // 2*(A1+B0)
-    const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
-    const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
-    const int16x8_t e0 = vrhaddq_s16(c1, a0);
-    const int16x8_t e1 = vrhaddq_s16(c0, a1);
-    const int16x8x2_t f = vzipq_s16(e0, e1);
-    const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
-    const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
-    const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
-    const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
-    const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
-    const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
-    vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
-    vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
-  }
-  for (; i < len; ++i) {
-    const int a0b1 = A[i + 0] + B[i + 1];
-    const int a1b0 = A[i + 1] + B[i + 0];
-    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-    out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
-    out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
-  }
-}
-
-static void SharpYuvFilterRow32_NEON(const int16_t* A, const int16_t* B,
-                                     int len, const uint16_t* best_y,
-                                     uint16_t* out, int bit_depth) {
-  const int max_y = (1 << bit_depth) - 1;
-  int i;
-  const uint16x8_t max = vdupq_n_u16(max_y);
-  for (i = 0; i + 4 <= len; i += 4) {
-    const int16x4_t a0 = vld1_s16(A + i + 0);
-    const int16x4_t a1 = vld1_s16(A + i + 1);
-    const int16x4_t b0 = vld1_s16(B + i + 0);
-    const int16x4_t b1 = vld1_s16(B + i + 1);
-    const int32x4_t a0b1 = vaddl_s16(a0, b1);
-    const int32x4_t a1b0 = vaddl_s16(a1, b0);
-    const int32x4_t a0a1b0b1 = vaddq_s32(a0b1, a1b0);  // A0+A1+B0+B1
-    const int32x4_t a0b1_2 = vaddq_s32(a0b1, a0b1);    // 2*(A0+B1)
-    const int32x4_t a1b0_2 = vaddq_s32(a1b0, a1b0);    // 2*(A1+B0)
-    const int32x4_t c0 = vshrq_n_s32(vaddq_s32(a0b1_2, a0a1b0b1), 3);
-    const int32x4_t c1 = vshrq_n_s32(vaddq_s32(a1b0_2, a0a1b0b1), 3);
-    const int32x4_t e0 = vrhaddq_s32(c1, vmovl_s16(a0));
-    const int32x4_t e1 = vrhaddq_s32(c0, vmovl_s16(a1));
-    const int32x4x2_t f = vzipq_s32(e0, e1);
-
-    const int16x8_t g = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i));
-    const int32x4_t h0 = vaddw_s16(f.val[0], vget_low_s16(g));
-    const int32x4_t h1 = vaddw_s16(f.val[1], vget_high_s16(g));
-    const uint16x8_t i_16 = vcombine_u16(vqmovun_s32(h0), vqmovun_s32(h1));
-    const uint16x8_t i_clamped = vminq_u16(i_16, max);
-    vst1q_u16(out + 2 * i + 0, i_clamped);
-  }
-  for (; i < len; ++i) {
-    const int a0b1 = A[i + 0] + B[i + 1];
-    const int a1b0 = A[i + 1] + B[i + 0];
-    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-    out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
-    out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
-  }
-}
-
-static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
-                                   const uint16_t* best_y, uint16_t* out,
-                                   int bit_depth) {
-  if (bit_depth <= 10) {
-    SharpYuvFilterRow16_NEON(A, B, len, best_y, out, bit_depth);
-  } else {
-    SharpYuvFilterRow32_NEON(A, B, len, best_y, out, bit_depth);
-  }
-}
-
-//------------------------------------------------------------------------------
-
-WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) {
-  SharpYuvUpdateY = SharpYuvUpdateY_NEON;
-  SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON;
-  SharpYuvFilterRow = SharpYuvFilterRow_NEON;
-}
-
-#else  // !WEBP_USE_NEON
-
-void InitSharpYuvNEON(void) {}
-
-#endif  // WEBP_USE_NEON
--- a/sharpyuv/sharpyuv_sse2.c
+++ b/sharpyuv/sharpyuv_sse2.c
@@ -1,204 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Speed-critical functions for Sharp YUV.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "sharpyuv/sharpyuv_dsp.h"
-
-#if defined(WEBP_USE_SSE2)
-#include <stdlib.h>
-#include <emmintrin.h>
-#endif
-
-extern void InitSharpYuvSSE2(void);
-
-#if defined(WEBP_USE_SSE2)
-
-static uint16_t clip_SSE2(int v, int max) {
-  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
-}
-
-static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
-                                     uint16_t* dst, int len, int bit_depth) {
-  const int max_y = (1 << bit_depth) - 1;
-  uint64_t diff = 0;
-  uint32_t tmp[4];
-  int i;
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i max = _mm_set1_epi16(max_y);
-  const __m128i one = _mm_set1_epi16(1);
-  __m128i sum = zero;
-
-  for (i = 0; i + 8 <= len; i += 8) {
-    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
-    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
-    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
-    const __m128i D = _mm_sub_epi16(A, B);       // diff_y
-    const __m128i E = _mm_cmpgt_epi16(zero, D);  // sign (-1 or 0)
-    const __m128i F = _mm_add_epi16(C, D);       // new_y
-    const __m128i G = _mm_or_si128(E, one);      // -1 or 1
-    const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
-    const __m128i I = _mm_madd_epi16(D, G);      // sum(abs(...))
-    _mm_storeu_si128((__m128i*)(dst + i), H);
-    sum = _mm_add_epi32(sum, I);
-  }
-  _mm_storeu_si128((__m128i*)tmp, sum);
-  diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
-  for (; i < len; ++i) {
-    const int diff_y = ref[i] - src[i];
-    const int new_y = (int)dst[i] + diff_y;
-    dst[i] = clip_SSE2(new_y, max_y);
-    diff += (uint64_t)abs(diff_y);
-  }
-  return diff;
-}
-
-static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
-                                   int16_t* dst, int len) {
-  int i = 0;
-  for (i = 0; i + 8 <= len; i += 8) {
-    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
-    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
-    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
-    const __m128i D = _mm_sub_epi16(A, B);   // diff_uv
-    const __m128i E = _mm_add_epi16(C, D);   // new_uv
-    _mm_storeu_si128((__m128i*)(dst + i), E);
-  }
-  for (; i < len; ++i) {
-    const int diff_uv = ref[i] - src[i];
-    dst[i] += diff_uv;
-  }
-}
-
-static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B,
-                                     int len, const uint16_t* best_y,
-                                     uint16_t* out, int bit_depth) {
-  const int max_y = (1 << bit_depth) - 1;
-  int i;
-  const __m128i kCst8 = _mm_set1_epi16(8);
-  const __m128i max = _mm_set1_epi16(max_y);
-  const __m128i zero = _mm_setzero_si128();
-  for (i = 0; i + 8 <= len; i += 8) {
-    const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
-    const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
-    const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
-    const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
-    const __m128i a0b1 = _mm_add_epi16(a0, b1);
-    const __m128i a1b0 = _mm_add_epi16(a1, b0);
-    const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0);  // A0+A1+B0+B1
-    const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
-    const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);    // 2*(A0+B1)
-    const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);    // 2*(A1+B0)
-    const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
-    const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
-    const __m128i d0 = _mm_add_epi16(c1, a0);
-    const __m128i d1 = _mm_add_epi16(c0, a1);
-    const __m128i e0 = _mm_srai_epi16(d0, 1);
-    const __m128i e1 = _mm_srai_epi16(d1, 1);
-    const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
-    const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
-    const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
-    const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
-    const __m128i h0 = _mm_add_epi16(g0, f0);
-    const __m128i h1 = _mm_add_epi16(g1, f1);
-    const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
-    const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
-    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
-    _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
-  }
-  for (; i < len; ++i) {
-    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
-    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
-    // We reuse the common sub-expressions.
-    const int a0b1 = A[i + 0] + B[i + 1];
-    const int a1b0 = A[i + 1] + B[i + 0];
-    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-    out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
-    out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
-  }
-}
-
-static WEBP_INLINE __m128i s16_to_s32(__m128i in) {
-  return _mm_srai_epi32(_mm_unpacklo_epi16(in, in), 16);
-}
-
-static void SharpYuvFilterRow32_SSE2(const int16_t* A, const int16_t* B,
-                                     int len, const uint16_t* best_y,
-                                     uint16_t* out, int bit_depth) {
-  const int max_y = (1 << bit_depth) - 1;
-  int i;
-  const __m128i kCst8 = _mm_set1_epi32(8);
-  const __m128i max = _mm_set1_epi16(max_y);
-  const __m128i zero = _mm_setzero_si128();
-  for (i = 0; i + 4 <= len; i += 4) {
-    const __m128i a0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 0)));
-    const __m128i a1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 1)));
-    const __m128i b0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 0)));
-    const __m128i b1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 1)));
-    const __m128i a0b1 = _mm_add_epi32(a0, b1);
-    const __m128i a1b0 = _mm_add_epi32(a1, b0);
-    const __m128i a0a1b0b1 = _mm_add_epi32(a0b1, a1b0);  // A0+A1+B0+B1
-    const __m128i a0a1b0b1_8 = _mm_add_epi32(a0a1b0b1, kCst8);
-    const __m128i a0b1_2 = _mm_add_epi32(a0b1, a0b1);  // 2*(A0+B1)
-    const __m128i a1b0_2 = _mm_add_epi32(a1b0, a1b0);  // 2*(A1+B0)
-    const __m128i c0 = _mm_srai_epi32(_mm_add_epi32(a0b1_2, a0a1b0b1_8), 3);
-    const __m128i c1 = _mm_srai_epi32(_mm_add_epi32(a1b0_2, a0a1b0b1_8), 3);
-    const __m128i d0 = _mm_add_epi32(c1, a0);
-    const __m128i d1 = _mm_add_epi32(c0, a1);
-    const __m128i e0 = _mm_srai_epi32(d0, 1);
-    const __m128i e1 = _mm_srai_epi32(d1, 1);
-    const __m128i f0 = _mm_unpacklo_epi32(e0, e1);
-    const __m128i f1 = _mm_unpackhi_epi32(e0, e1);
-    const __m128i g = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
-    const __m128i h_16 = _mm_add_epi16(g, _mm_packs_epi32(f0, f1));
-    const __m128i final = _mm_max_epi16(_mm_min_epi16(h_16, max), zero);
-    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), final);
-  }
-  for (; i < len; ++i) {
-    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
-    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
-    // We reuse the common sub-expressions.
-    const int a0b1 = A[i + 0] + B[i + 1];
-    const int a1b0 = A[i + 1] + B[i + 0];
-    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-    out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
-    out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
-  }
-}
-
-static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
-                                   const uint16_t* best_y, uint16_t* out,
-                                   int bit_depth) {
-  if (bit_depth <= 10) {
-    SharpYuvFilterRow16_SSE2(A, B, len, best_y, out, bit_depth);
-  } else {
-    SharpYuvFilterRow32_SSE2(A, B, len, best_y, out, bit_depth);
-  }
-}
-
-//------------------------------------------------------------------------------
-
-extern void InitSharpYuvSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) {
-  SharpYuvUpdateY = SharpYuvUpdateY_SSE2;
-  SharpYuvUpdateRGB = SharpYuvUpdateRGB_SSE2;
-  SharpYuvFilterRow = SharpYuvFilterRow_SSE2;
-}
-#else  // !WEBP_USE_SSE2
-
-void InitSharpYuvSSE2(void) {}
-
-#endif  // WEBP_USE_SSE2
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -36,7 +36,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
 # other than the ones listed on the command line, i.e., after linking, it will
 # not have unresolved symbols. Some platforms (Windows among them) require all
 # symbols in shared libraries to be resolved at library creation.
-libwebp_la_LDFLAGS = -no-undefined -version-info 8:4:1
+libwebp_la_LDFLAGS = -no-undefined -version-info 8:2:1
 libwebpincludedir = $(includedir)/webp
 pkgconfig_DATA = libwebp.pc

@@ -48,7 +48,7 @@ if BUILD_LIBWEBPDECODER
  libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
  libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la

-  libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 4:4:1
+  libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 4:2:1
  pkgconfig_DATA += libwebpdecoder.pc
 endif

--- a/src/dec/vp8_dec.c
+++ b/src/dec/vp8_dec.c
@@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = {
  0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
 };

-// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2
+// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
 static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
  int v;
  if (!VP8GetBit(br, p[3], "coeffs")) {
--- a/src/dec/vp8i_dec.h
+++ b/src/dec/vp8i_dec.h
@@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define DEC_MAJ_VERSION 1
 #define DEC_MIN_VERSION 2
-#define DEC_REV_VERSION 3
+#define DEC_REV_VERSION 1

 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
 // Constraints are: We need to store one 16x16 block of luma samples (y),
--- a/src/dec/vp8l_dec.c
+++ b/src/dec/vp8l_dec.c
@@ -84,7 +84,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
 // to 256 (green component values) + 24 (length prefix values)
 // + color_cache_size (between 0 and 2048).
 // All values computed for 8-bit first level lookup with Mark Adler's tool:
-// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c
+// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
 #define FIXED_TABLE_SIZE (630 * 3 + 410)
 static const uint16_t kTableSize[12] = {
  FIXED_TABLE_SIZE + 654,
@@ -178,7 +178,7 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {

 //------------------------------------------------------------------------------
 // Decodes the next Huffman code from bit-stream.
-// VP8LFillBitWindow(br) needs to be called at minimum every second call
+// FillBitWindow(br) needs to be called at minimum every second call
 // to ReadSymbol, in order to pre-fetch enough bits.
 static WEBP_INLINE int ReadSymbol(const HuffmanCode* table,
                                  VP8LBitReader* const br) {
@@ -321,7 +321,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
    // The first code is either 1 bit or 8 bit code.
    int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
    code_lengths[symbol] = 1;
-    // The second code (if present), is always 8 bits long.
+    // The second code (if present), is always 8 bit long.
    if (num_symbols == 2) {
      symbol = VP8LReadBits(br, 8);
      code_lengths[symbol] = 1;
@@ -1281,7 +1281,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
    uint8_t* const new_data = (uint8_t*)new_color_map;
    new_color_map[0] = transform->data_[0];
    for (i = 4; i < 4 * num_colors; ++i) {
-      // Equivalent to VP8LAddPixels(), on a byte-basis.
+      // Equivalent to AddPixelEq(), on a byte-basis.
      new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
    }
    for (; i < 4 * final_num_colors; ++i) {
--- a/src/demux/Makefile.am
+++ b/src/demux/Makefile.am
@@ -13,6 +13,6 @@ noinst_HEADERS =
 noinst_HEADERS += ../webp/format_constants.h

 libwebpdemux_la_LIBADD = ../libwebp.la
-libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:10:0
+libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:8:0
 libwebpdemuxincludedir = $(includedir)/webp
 pkgconfig_DATA = libwebpdemux.pc
--- a/src/demux/anim_decode.c
+++ b/src/demux/anim_decode.c
@@ -23,14 +23,6 @@

 #define NUM_CHANNELS 4

-// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA
-// buffer.
-#ifdef WORDS_BIGENDIAN
-#define CHANNEL_SHIFT(i) (24 - (i) * 8)
-#else
-#define CHANNEL_SHIFT(i) ((i) * 8)
-#endif
-
 typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int);
 static void BlendPixelRowNonPremult(uint32_t* const src,
                                    const uint32_t* const dst, int num_pixels);
@@ -217,35 +209,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
  const uint8_t dst_channel = (dst >> shift) & 0xff;
  const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
  assert(blend_unscaled < (1ULL << 32) / scale);
-  return (blend_unscaled * scale) >> CHANNEL_SHIFT(3);
+  return (blend_unscaled * scale) >> 24;
 }

 // Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha.
 static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) {
-  const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
+  const uint8_t src_a = (src >> 24) & 0xff;

  if (src_a == 0) {
    return dst;
  } else {
-    const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff;
+    const uint8_t dst_a = (dst >> 24) & 0xff;
    // This is the approximate integer arithmetic for the actual formula:
    // dst_factor_a = (dst_a * (255 - src_a)) / 255.
    const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8;
    const uint8_t blend_a = src_a + dst_factor_a;
    const uint32_t scale = (1UL << 24) / blend_a;

-    const uint8_t blend_r = BlendChannelNonPremult(
-        src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0));
-    const uint8_t blend_g = BlendChannelNonPremult(
-        src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1));
-    const uint8_t blend_b = BlendChannelNonPremult(
-        src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2));
+    const uint8_t blend_r =
+        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0);
+    const uint8_t blend_g =
+        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8);
+    const uint8_t blend_b =
+        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16);
    assert(src_a + dst_factor_a < 256);

-    return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) |
-           ((uint32_t)blend_g << CHANNEL_SHIFT(1)) |
-           ((uint32_t)blend_b << CHANNEL_SHIFT(2)) |
-           ((uint32_t)blend_a << CHANNEL_SHIFT(3));
+    return (blend_r << 0) |
+           (blend_g << 8) |
+           (blend_b << 16) |
+           ((uint32_t)blend_a << 24);
  }
 }

@@ -255,7 +247,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src,
                                    const uint32_t* const dst, int num_pixels) {
  int i;
  for (i = 0; i < num_pixels; ++i) {
-    const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
+    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
    if (src_alpha != 0xff) {
      src[i] = BlendPixelNonPremult(src[i], dst[i]);
    }
@@ -272,7 +264,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) {

 // Blend 'src' over 'dst' assuming they are pre-multiplied by alpha.
 static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) {
-  const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
+  const uint8_t src_a = (src >> 24) & 0xff;
  return src + ChannelwiseMultiply(dst, 256 - src_a);
 }

@@ -282,7 +274,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
                                 int num_pixels) {
  int i;
  for (i = 0; i < num_pixels; ++i) {
-    const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
+    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
    if (src_alpha != 0xff) {
      src[i] = BlendPixelPremult(src[i], dst[i]);
    }
--- a/src/demux/demux.c
+++ b/src/demux/demux.c
@@ -25,7 +25,7 @@

 #define DMUX_MAJ_VERSION 1
 #define DMUX_MIN_VERSION 2
-#define DMUX_REV_VERSION 3
+#define DMUX_REV_VERSION 1

 typedef struct {
  size_t start_;        // start location of the data
@@ -614,6 +614,7 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {

  while (f != NULL) {
    const int cur_frame_set = f->frame_num_;
+    int frame_count = 0;

    // Check frame properties.
    for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
@@ -648,6 +649,8 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
                            dmux->canvas_width_, dmux->canvas_height_)) {
        return 0;
      }
+
+      ++frame_count;
    }
  }
  return 1;
--- a/src/demux/libwebpdemux.rc
+++ b/src/demux/libwebpdemux.rc
@@ -6,8 +6,8 @@
 LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US

 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 1,0,2,3
- PRODUCTVERSION 1,0,2,3
+ FILEVERSION 1,0,2,1
+ PRODUCTVERSION 1,0,2,1
 FILEFLAGSMASK 0x3fL
 #ifdef _DEBUG
 FILEFLAGS 0x1L
@@ -24,12 +24,12 @@ BEGIN
        BEGIN
            VALUE "CompanyName", "Google, Inc."
            VALUE "FileDescription", "libwebpdemux DLL"
-            VALUE "FileVersion", "1.2.3"
+            VALUE "FileVersion", "1.2.1"
            VALUE "InternalName", "libwebpdemux.dll"
-            VALUE "LegalCopyright", "Copyright (C) 2022"
+            VALUE "LegalCopyright", "Copyright (C) 2021"
            VALUE "OriginalFilename", "libwebpdemux.dll"
            VALUE "ProductName", "WebP Image Demuxer"
-            VALUE "ProductVersion", "1.2.3"
+            VALUE "ProductVersion", "1.2.1"
        END
    END
    BLOCK "VarFileInfo"
--- a/src/dsp/Makefile.am
+++ b/src/dsp/Makefile.am
@@ -13,6 +13,7 @@ noinst_LTLIBRARIES += libwebpdsp_mips32.la
 noinst_LTLIBRARIES += libwebpdspdecode_mips32.la
 noinst_LTLIBRARIES += libwebpdsp_mips_dsp_r2.la
 noinst_LTLIBRARIES += libwebpdspdecode_mips_dsp_r2.la
+noinst_LTLIBRARIES += libwebpdspdecode_wasm.la

 if BUILD_LIBWEBPDECODER
  noinst_LTLIBRARIES += libwebpdspdecode.la
@@ -24,7 +25,6 @@ commondir = $(includedir)/webp
 COMMON_SOURCES =
 COMMON_SOURCES += alpha_processing.c
 COMMON_SOURCES += cpu.c
-COMMON_SOURCES += cpu.h
 COMMON_SOURCES += dec.c
 COMMON_SOURCES += dec_clip_tables.c
 COMMON_SOURCES += dsp.h
@@ -107,6 +107,8 @@ libwebpdspdecode_mips_dsp_r2_la_SOURCES += yuv_mips_dsp_r2.c
 libwebpdspdecode_mips_dsp_r2_la_CPPFLAGS = $(libwebpdsp_mips_dsp_r2_la_CPPFLAGS)
 libwebpdspdecode_mips_dsp_r2_la_CFLAGS = $(libwebpdsp_mips_dsp_r2_la_CFLAGS)

+libwebpdspdecode_wasm_la_SOURCES = dec_wasm.c
+
 libwebpdsp_sse2_la_SOURCES =
 libwebpdsp_sse2_la_SOURCES += cost_sse2.c
 libwebpdsp_sse2_la_SOURCES += enc_sse2.c
--- a/src/dsp/alpha_processing_neon.c
+++ b/src/dsp/alpha_processing_neon.c
@@ -83,7 +83,7 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
 static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
                              int alpha_stride, int width, int height,
                              uint8_t* WEBP_RESTRICT dst, int dst_stride) {
-  uint32_t alpha_mask = 0xffu;
+  uint32_t alpha_mask = 0xffffffffu;
  uint8x8_t mask8 = vdup_n_u8(0xff);
  uint32_t tmp[2];
  int i, j;
@@ -107,7 +107,6 @@ static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
    dst += dst_stride;
  }
  vst1_u8((uint8_t*)tmp, mask8);
-  alpha_mask *= 0x01010101;
  alpha_mask &= tmp[0];
  alpha_mask &= tmp[1];
  return (alpha_mask != 0xffffffffu);
@@ -136,7 +135,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
 static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
                             int width, int height,
                             uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
-  uint32_t alpha_mask = 0xffu;
+  uint32_t alpha_mask = 0xffffffffu;
  uint8x8_t mask8 = vdup_n_u8(0xff);
  uint32_t tmp[2];
  int i, j;
@@ -158,7 +157,6 @@ static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
    alpha += alpha_stride;
  }
  vst1_u8((uint8_t*)tmp, mask8);
-  alpha_mask *= 0x01010101;
  alpha_mask &= tmp[0];
  alpha_mask &= tmp[1];
  return (alpha_mask == 0xffffffffu);
--- a/src/dsp/cpu.c
+++ b/src/dsp/cpu.c
@@ -11,7 +11,7 @@
 //
 // Author: Christian Duvivier (cduvivier@google.com)

-#include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"

 #if defined(WEBP_HAVE_NEON_RTCD)
 #include <stdio.h>
@@ -26,8 +26,10 @@
 // SSE2 detection.
 //

+// Skip SSE detection if using Wasm SIMD build.
+#if defined(WEBP_USE_WASM_SIMD)
 // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
-#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
+#elif (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
  __asm__ volatile (
    "mov %%ebx, %%edi\n"
@@ -69,8 +71,10 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {

 #endif

+// Skip xgetbv definition if using Wasm SIMD build.
+#if defined(WEBP_USE_WASM_SIMD)
 // NaCl has no support for xgetbv or the raw opcode.
-#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
+#elif !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
 static WEBP_INLINE uint64_t xgetbv(void) {
  const uint32_t ecx = 0;
  uint32_t eax, edx;
@@ -100,7 +104,13 @@ static WEBP_INLINE uint64_t xgetbv(void) {
 #define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
 #endif

-#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID)
+#if defined(WEBP_USE_WASM_SIMD)
+static int wasmCPUInfo(CPUFeature feature) {
+  if (feature != kWasmSIMD) return 0;
+  return 1;
+}
+VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
+#elif defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID)

 // helper function for run-time detection of slow SSSE3 platforms
 static int CheckSlowModel(int info) {
--- a/src/dsp/cpu.h
+++ b/src/dsp/cpu.h
@@ -1,254 +0,0 @@
-// Copyright 2022 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   CPU detection functions and macros.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#ifndef WEBP_DSP_CPU_H_
-#define WEBP_DSP_CPU_H_
-
-#ifdef HAVE_CONFIG_H
-#include "src/webp/config.h"
-#endif
-
-#include "src/webp/types.h"
-
-#if defined(__GNUC__)
-#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
-#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
-#else
-#define LOCAL_GCC_VERSION 0
-#define LOCAL_GCC_PREREQ(maj, min) 0
-#endif
-
-#if defined(__clang__)
-#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
-#define LOCAL_CLANG_PREREQ(maj, min) \
-  (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
-#else
-#define LOCAL_CLANG_VERSION 0
-#define LOCAL_CLANG_PREREQ(maj, min) 0
-#endif
-
-#ifndef __has_builtin
-#define __has_builtin(x) 0
-#endif
-
-#if !defined(HAVE_CONFIG_H)
-#if defined(_MSC_VER) && _MSC_VER > 1310 && \
-    (defined(_M_X64) || defined(_M_IX86))
-#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
-    (defined(_M_X64) || defined(_M_IX86))
-#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
-#endif
-#endif
-
-// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
-// files without intrinsics, allowing the corresponding Init() to be called.
-// Files containing intrinsics will need to be built targeting the instruction
-// set so should succeed on one of the earlier tests.
-#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
-    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
-#define WEBP_USE_SSE2
-#endif
-
-#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
-#define WEBP_HAVE_SSE2
-#endif
-
-#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
-    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
-#define WEBP_USE_SSE41
-#endif
-
-#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
-#define WEBP_HAVE_SSE41
-#endif
-
-#undef WEBP_MSC_SSE41
-#undef WEBP_MSC_SSE2
-
-// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
-// inline assembly would need to be modified for use with Native Client.
-#if ((defined(__ARM_NEON__) || defined(__aarch64__)) &&       \
-     (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
-    !defined(__native_client__)
-#define WEBP_USE_NEON
-#endif
-
-#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
-    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
-#define WEBP_ANDROID_NEON  // Android targets that may have NEON
-#define WEBP_USE_NEON
-#endif
-
-// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
-// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
-// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
-// vtbl4_u8(); a fix was made in 16.6.
-#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
-                          (_MSC_VER >= 1926 && defined(_M_ARM64)))
-#define WEBP_USE_NEON
-#define WEBP_USE_INTRINSICS
-#endif
-
-#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
-#define WEBP_HAVE_NEON
-#endif
-
-#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
-    (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
-#define WEBP_USE_MIPS32
-#if (__mips_isa_rev >= 2)
-#define WEBP_USE_MIPS32_R2
-#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
-#define WEBP_USE_MIPS_DSP_R2
-#endif
-#endif
-#endif
-
-#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
-#define WEBP_USE_MSA
-#endif
-
-#ifndef WEBP_DSP_OMIT_C_CODE
-#define WEBP_DSP_OMIT_C_CODE 1
-#endif
-
-#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
-#define WEBP_NEON_OMIT_C_CODE 1
-#else
-#define WEBP_NEON_OMIT_C_CODE 0
-#endif
-
-#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \
-      defined(__aarch64__))
-#define WEBP_NEON_WORK_AROUND_GCC 1
-#else
-#define WEBP_NEON_WORK_AROUND_GCC 0
-#endif
-
-// This macro prevents thread_sanitizer from reporting known concurrent writes.
-#define WEBP_TSAN_IGNORE_FUNCTION
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-#undef WEBP_TSAN_IGNORE_FUNCTION
-#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
-#endif
-#endif
-
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
-#define WEBP_MSAN
-#endif
-#endif
-
-#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
-#include <pthread.h>  // NOLINT
-
-#define WEBP_DSP_INIT(func)                                         \
-  do {                                                              \
-    static volatile VP8CPUInfo func##_last_cpuinfo_used =           \
-        (VP8CPUInfo)&func##_last_cpuinfo_used;                      \
-    static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \
-    if (pthread_mutex_lock(&func##_lock)) break;                    \
-    if (func##_last_cpuinfo_used != VP8GetCPUInfo) func();          \
-    func##_last_cpuinfo_used = VP8GetCPUInfo;                       \
-    (void)pthread_mutex_unlock(&func##_lock);                       \
-  } while (0)
-#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
-#define WEBP_DSP_INIT(func)                               \
-  do {                                                    \
-    static volatile VP8CPUInfo func##_last_cpuinfo_used = \
-        (VP8CPUInfo)&func##_last_cpuinfo_used;            \
-    if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \
-    func();                                               \
-    func##_last_cpuinfo_used = VP8GetCPUInfo;             \
-  } while (0)
-#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
-
-// Defines an Init + helper function that control multiple initialization of
-// function pointers / tables.
-/* Usage:
-   WEBP_DSP_INIT_FUNC(InitFunc) {
-     ...function body
-   }
-*/
-#define WEBP_DSP_INIT_FUNC(name)                                            \
-  static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void);                  \
-  WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \
-  static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void)
-
-#define WEBP_UBSAN_IGNORE_UNDEF
-#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-#if defined(__clang__) && defined(__has_attribute)
-#if __has_attribute(no_sanitize)
-// This macro prevents the undefined behavior sanitizer from reporting
-// failures. This is only meant to silence unaligned loads on platforms that
-// are known to support them.
-#undef WEBP_UBSAN_IGNORE_UNDEF
-#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined")))
-
-// This macro prevents the undefined behavior sanitizer from reporting
-// failures related to unsigned integer overflows. This is only meant to
-// silence cases where this well defined behavior is expected.
-#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
-  __attribute__((no_sanitize("unsigned-integer-overflow")))
-#endif
-#endif
-
-// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
-// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
-#if !defined(WEBP_OFFSET_PTR)
-#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
-#endif
-
-// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
-#if !defined(WEBP_SWAP_16BIT_CSP)
-#define WEBP_SWAP_16BIT_CSP 0
-#endif
-
-// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
-#if !defined(WORDS_BIGENDIAN) &&                   \
-    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
-     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
-#define WORDS_BIGENDIAN
-#endif
-
-typedef enum {
-  kSSE2,
-  kSSE3,
-  kSlowSSSE3,  // special feature for slow SSSE3 architectures
-  kSSE4_1,
-  kAVX,
-  kAVX2,
-  kNEON,
-  kMIPS32,
-  kMIPSdspR2,
-  kMSA
-} CPUFeature;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// returns true if the CPU supports the feature.
-typedef int (*VP8CPUInfo)(CPUFeature feature);
-WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
-
-#ifdef __cplusplus
-}    // extern "C"
-#endif
-
-#endif  // WEBP_DSP_CPU_H_
--- a/src/dsp/dec.c
+++ b/src/dsp/dec.c
@@ -740,6 +740,7 @@ extern void VP8DspInitNEON(void);
 extern void VP8DspInitMIPS32(void);
 extern void VP8DspInitMIPSdspR2(void);
 extern void VP8DspInitMSA(void);
+extern void VP8DspInitWasmSIMD(void);

 WEBP_DSP_INIT_FUNC(VP8DspInit) {
  VP8InitClipTables();
@@ -831,6 +832,12 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) {
    if (VP8GetCPUInfo(kMSA)) {
      VP8DspInitMSA();
    }
+#endif
+#if defined(WEBP_USE_WASM_SIMD) && defined(WEBP_ENABLE_WASM_SIMD_INTRINSICS)
+    // Check that SIMD is supported and that we want to use Wasm intrinsics.
+    if (VP8GetCPUInfo(kWasmSIMD)) {
+      VP8DspInitWasmSIMD();
+    }
 #endif
  }

--- a/src/dsp/dec_wasm.c
+++ b/src/dsp/dec_wasm.c
@@ -0,0 +1,32 @@
+// Copyright 2021 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebAssembly (Wasm) version of some decoding functions.
+//
+// This will contain Wasm implementation of some decoding functions.
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_WASM_SIMD)
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitWasmSIMD(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitWasmSIMD(void) {
+  // TODO(crbug.com/v8/12371): No special implementation for Wasm yet, will be
+  // added later.
+}
+
+#else
+
+WEBP_DSP_INIT_STUB(VP8DspInitWasmSIMD)
+
+#endif  // WEBP_USE_WASM_SIMD
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -18,7 +18,6 @@
 #include "src/webp/config.h"
 #endif

-#include "src/dsp/cpu.h"
 #include "src/webp/types.h"

 #ifdef __cplusplus
@@ -44,6 +43,231 @@ extern "C" {
 #define WEBP_RESTRICT
 #endif

+//------------------------------------------------------------------------------
+// CPU detection
+
+#if defined(__GNUC__)
+# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
+# define LOCAL_GCC_PREREQ(maj, min) \
+    (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
+#else
+# define LOCAL_GCC_VERSION 0
+# define LOCAL_GCC_PREREQ(maj, min) 0
+#endif
+
+#if defined(__clang__)
+# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
+# define LOCAL_CLANG_PREREQ(maj, min) \
+    (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
+#else
+# define LOCAL_CLANG_VERSION 0
+# define LOCAL_CLANG_PREREQ(maj, min) 0
+#endif
+
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+#if !defined(HAVE_CONFIG_H)
+#if defined(_MSC_VER) && _MSC_VER > 1310 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
+#endif
+#endif
+
+#if (defined(EMSCRIPTEN) || defined(WEBP_HAVE_WASM_SIMD)) && \
+    defined(__wasm_simd128__)
+#define WEBP_USE_WASM_SIMD
+#endif
+
+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
+// files without intrinsics, allowing the corresponding Init() to be called.
+// Files containing intrinsics will need to be built targeting the instruction
+// set so should succeed on one of the earlier tests.
+#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
+    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
+#define WEBP_USE_SSE2
+#endif
+
+#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
+#define WEBP_HAVE_SSE2
+#endif
+
+#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
+    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
+#define WEBP_USE_SSE41
+#endif
+
+#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
+#define WEBP_HAVE_SSE41
+#endif
+
+#undef WEBP_MSC_SSE41
+#undef WEBP_MSC_SSE2
+
+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
+// inline assembly would need to be modified for use with Native Client.
+#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
+     (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
+    !defined(__native_client__)
+#define WEBP_USE_NEON
+#endif
+
+#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
+    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
+#define WEBP_ANDROID_NEON  // Android targets that may have NEON
+#define WEBP_USE_NEON
+#endif
+
+// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
+// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
+// arm_neon.h.
+#if defined(_MSC_VER) && \
+  ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
+   (_MSC_VER >= 1920 && defined(_M_ARM64)))
+#define WEBP_USE_NEON
+#define WEBP_USE_INTRINSICS
+#endif
+
+#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
+#define WEBP_HAVE_NEON
+#endif
+
+#if defined(__mips__) && !defined(__mips64) && \
+    defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
+#define WEBP_USE_MIPS32
+#if (__mips_isa_rev >= 2)
+#define WEBP_USE_MIPS32_R2
+#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
+#define WEBP_USE_MIPS_DSP_R2
+#endif
+#endif
+#endif
+
+#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
+#define WEBP_USE_MSA
+#endif
+
+#ifndef WEBP_DSP_OMIT_C_CODE
+#define WEBP_DSP_OMIT_C_CODE 1
+#endif
+
+#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
+#define WEBP_NEON_OMIT_C_CODE 1
+#else
+#define WEBP_NEON_OMIT_C_CODE 0
+#endif
+
+#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
+#define WEBP_NEON_WORK_AROUND_GCC 1
+#else
+#define WEBP_NEON_WORK_AROUND_GCC 0
+#endif
+
+// This macro prevents thread_sanitizer from reporting known concurrent writes.
+#define WEBP_TSAN_IGNORE_FUNCTION
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#undef WEBP_TSAN_IGNORE_FUNCTION
+#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
+#endif
+#endif
+
+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
+#include <pthread.h>  // NOLINT
+
+#define WEBP_DSP_INIT(func) do {                                    \
+  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
+      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
+  static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
+  if (pthread_mutex_lock(&func ## _lock)) break;                    \
+  if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func();          \
+  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
+  (void)pthread_mutex_unlock(&func ## _lock);                       \
+} while (0)
+#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
+#define WEBP_DSP_INIT(func) do {                                    \
+  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
+      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
+  if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break;           \
+  func();                                                           \
+  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
+} while (0)
+#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
+
+// Defines an Init + helper function that control multiple initialization of
+// function pointers / tables.
+/* Usage:
+   WEBP_DSP_INIT_FUNC(InitFunc) {
+     ...function body
+   }
+*/
+#define WEBP_DSP_INIT_FUNC(name)                             \
+  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
+  WEBP_TSAN_IGNORE_FUNCTION void name(void) {                \
+    WEBP_DSP_INIT(name ## _body);                            \
+  }                                                          \
+  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
+
+#define WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#if defined(__clang__) && defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures. This is only meant to silence unaligned loads on platforms that
+// are known to support them.
+#undef WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNDEF \
+  __attribute__((no_sanitize("undefined")))
+
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures related to unsigned integer overflows. This is only meant to
+// silence cases where this well defined behavior is expected.
+#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
+  __attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
+
+// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
+// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
+#if !defined(WEBP_OFFSET_PTR)
+#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
+#endif
+
+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
+#if !defined(WEBP_SWAP_16BIT_CSP)
+#define WEBP_SWAP_16BIT_CSP 0
+#endif
+
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) && \
+    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
+typedef enum {
+  kSSE2,
+  kSSE3,
+  kSlowSSSE3,  // special feature for slow SSSE3 architectures
+  kSSE4_1,
+  kAVX,
+  kAVX2,
+  kNEON,
+  kMIPS32,
+  kMIPSdspR2,
+  kMSA,
+  kWasmSIMD
+} CPUFeature;
+// returns true if the CPU supports the feature.
+typedef int (*VP8CPUInfo)(CPUFeature feature);
+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;

 //------------------------------------------------------------------------------
 // Init stub generator
@@ -332,6 +556,15 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
 extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
                                    uint8_t* u, uint8_t* v, int width);

+// utilities for accurate RGB->YUV conversion
+extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
+                                       uint16_t* dst, int len);
+extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
+                                     int16_t* dst, int len);
+extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
+                                     int len,
+                                     const uint16_t* best_y, uint16_t* out);
+
 // Must be called before using the above.
 void WebPInitConvertARGBToYUV(void);

--- a/src/dsp/enc_neon.c
+++ b/src/dsp/enc_neon.c
@@ -9,7 +9,7 @@
 //
 // ARM NEON version of speed-critical encoding functions.
 //
-// adapted from libvpx (https://www.webmproject.org/code/)
+// adapted from libvpx (http://www.webmproject.org/code/)

 #include "src/dsp/dsp.h"

--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -107,77 +107,63 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
 //------------------------------------------------------------------------------
 // Predictors

-uint32_t VP8LPredictor0_C(const uint32_t* const left,
-                          const uint32_t* const top) {
+uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) {
  (void)top;
  (void)left;
  return ARGB_BLACK;
 }
-uint32_t VP8LPredictor1_C(const uint32_t* const left,
-                          const uint32_t* const top) {
+uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) {
  (void)top;
-  return *left;
+  return left;
 }
-uint32_t VP8LPredictor2_C(const uint32_t* const left,
-                          const uint32_t* const top) {
+uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) {
  (void)left;
  return top[0];
 }
-uint32_t VP8LPredictor3_C(const uint32_t* const left,
-                          const uint32_t* const top) {
+uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) {
  (void)left;
  return top[1];
 }
-uint32_t VP8LPredictor4_C(const uint32_t* const left,
-                          const uint32_t* const top) {
+uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) {
  (void)left;
  return top[-1];
 }
-uint32_t VP8LPredictor5_C(const uint32_t* const left,
-                          const uint32_t* const top) {
-  const uint32_t pred = Average3(*left, top[0], top[1]);
+uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average3(left, top[0], top[1]);
  return pred;
 }
-uint32_t VP8LPredictor6_C(const uint32_t* const left,
-                          const uint32_t* const top) {
-  const uint32_t pred = Average2(*left, top[-1]);
+uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[-1]);
  return pred;
 }
-uint32_t VP8LPredictor7_C(const uint32_t* const left,
-                          const uint32_t* const top) {
-  const uint32_t pred = Average2(*left, top[0]);
+uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[0]);
  return pred;
 }
-uint32_t VP8LPredictor8_C(const uint32_t* const left,
-                          const uint32_t* const top) {
+uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) {
  const uint32_t pred = Average2(top[-1], top[0]);
  (void)left;
  return pred;
 }
-uint32_t VP8LPredictor9_C(const uint32_t* const left,
-                          const uint32_t* const top) {
+uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) {
  const uint32_t pred = Average2(top[0], top[1]);
  (void)left;
  return pred;
 }
-uint32_t VP8LPredictor10_C(const uint32_t* const left,
-                           const uint32_t* const top) {
-  const uint32_t pred = Average4(*left, top[-1], top[0], top[1]);
+uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
  return pred;
 }
-uint32_t VP8LPredictor11_C(const uint32_t* const left,
-                           const uint32_t* const top) {
-  const uint32_t pred = Select(top[0], *left, top[-1]);
+uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Select(top[0], left, top[-1]);
  return pred;
 }
-uint32_t VP8LPredictor12_C(const uint32_t* const left,
-                           const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]);
+uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
  return pred;
 }
-uint32_t VP8LPredictor13_C(const uint32_t* const left,
-                           const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]);
+uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
  return pred;
 }

--- a/src/dsp/lossless.h
+++ b/src/dsp/lossless.h
@@ -28,38 +28,23 @@ extern "C" {
 //------------------------------------------------------------------------------
 // Decoding

-typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left,
-                                      const uint32_t* const top);
+typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
 extern VP8LPredictorFunc VP8LPredictors[16];

-uint32_t VP8LPredictor0_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor1_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor2_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor3_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor4_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor5_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor6_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor7_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor8_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor9_C(const uint32_t* const left,
-                          const uint32_t* const top);
-uint32_t VP8LPredictor10_C(const uint32_t* const left,
-                           const uint32_t* const top);
-uint32_t VP8LPredictor11_C(const uint32_t* const left,
-                           const uint32_t* const top);
-uint32_t VP8LPredictor12_C(const uint32_t* const left,
-                           const uint32_t* const top);
-uint32_t VP8LPredictor13_C(const uint32_t* const left,
-                           const uint32_t* const top);
+uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top);

 // These Add/Sub function expects upper[-1] and out[-1] to be readable.
 typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
@@ -182,9 +167,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
 // -----------------------------------------------------------------------------
 // Huffman-cost related functions.

-typedef float (*VP8LCostFunc)(const uint32_t* population, int length);
-typedef float (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
-                                      int length);
+typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
+typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
+                                       int length);
 typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
                                                const int Y[256]);

@@ -198,7 +183,7 @@ typedef struct {        // small struct to hold counters
 } VP8LStreaks;

 typedef struct {            // small struct to hold bit entropy results
-  float entropy;            // entropy
+  double entropy;           // entropy
  uint32_t sum;             // sum of the population
  int nonzeros;             // number of non-zero elements in the population
  uint32_t max_val;         // maximum value in the population
--- a/src/dsp/lossless_common.h
+++ b/src/dsp/lossless_common.h
@@ -179,7 +179,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
  int x;                                                             \
  assert(upper != NULL);                                             \
  for (x = 0; x < num_pixels; ++x) {                                 \
-    const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x);       \
+    const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x);        \
    out[x] = VP8LAddPixels(in[x], pred);                             \
  }                                                                  \
 }
--- a/src/dsp/lossless_enc.c
+++ b/src/dsp/lossless_enc.c
@@ -402,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) {
 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
 static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
  int i;
-  float retval = 0.f;
+  double retval = 0.;
  int sumX = 0, sumXY = 0;
  for (i = 0; i < 256; ++i) {
    const int x = X[i];
@@ -418,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
    }
  }
  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
-  return retval;
+  return (float)retval;
 }

 void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
@@ -636,17 +636,17 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,

 //------------------------------------------------------------------------------

-static float ExtraCost_C(const uint32_t* population, int length) {
+static double ExtraCost_C(const uint32_t* population, int length) {
  int i;
-  float cost = 0.f;
+  double cost = 0.;
  for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
  return cost;
 }

-static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
+static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
                                  int length) {
  int i;
-  float cost = 0.f;
+  double cost = 0.;
  for (i = 2; i < length - 2; ++i) {
    const int xy = X[i + 2] + Y[i + 2];
    cost += (i >> 1) * xy;
@@ -745,7 +745,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in,              \
  assert(upper != NULL);                                                   \
  for (x = 0; x < num_pixels; ++x) {                                       \
    const uint32_t pred =                                                  \
-        VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x);             \
+        VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x);              \
    out[x] = VP8LSubPixels(in[x], pred);                                   \
  }                                                                        \
 }
--- a/src/dsp/lossless_enc_mips32.c
+++ b/src/dsp/lossless_enc_mips32.c
@@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
 //     cost += i * *(pop + 1);
 //     pop += 2;
 //   }
-//   return (float)cost;
-static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
+//   return (double)cost;
+static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
  int i, temp0, temp1;
  const uint32_t* pop = &population[4];
  const uint32_t* const LoopEnd = &population[length];
@@ -130,7 +130,7 @@ static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
    : "memory", "hi", "lo"
  );

-  return (float)((int64_t)temp0 << 32 | temp1);
+  return (double)((int64_t)temp0 << 32 | temp1);
 }

 // C version of this function:
@@ -148,9 +148,9 @@ static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
 //     pX += 2;
 //     pY += 2;
 //   }
-//   return (float)cost;
-static float ExtraCostCombined_MIPS32(const uint32_t* const X,
-                                      const uint32_t* const Y, int length) {
+//   return (double)cost;
+static double ExtraCostCombined_MIPS32(const uint32_t* const X,
+                                       const uint32_t* const Y, int length) {
  int i, temp0, temp1, temp2, temp3;
  const uint32_t* pX = &X[4];
  const uint32_t* pY = &Y[4];
@@ -183,7 +183,7 @@ static float ExtraCostCombined_MIPS32(const uint32_t* const X,
    : "memory", "hi", "lo"
  );

-  return (float)((int64_t)temp0 << 32 | temp1);
+  return (double)((int64_t)temp0 << 32 | temp1);
 }

 #define HUFFMAN_COST_PASS                                 \
@@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
 static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
                             uint32_t* pout, int size) {
  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  const int end = ((size) / 4) * 4;
+  const uint32_t end = ((size) / 4) * 4;
  const uint32_t* const LoopEnd = pa + end;
  int i;
  ASM_START
  ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
  ASM_END_0
-  for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
+  for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
 }

 static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  const int end = ((size) / 4) * 4;
+  const uint32_t end = ((size) / 4) * 4;
  const uint32_t* const LoopEnd = pa + end;
  int i;
  ASM_START
  ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
  ASM_END_1
-  for (i = 0; i < size - end; ++i) pout[i] += pa[i];
+  for (i = end; i < size; ++i) pout[i] += pa[i];
 }

 #undef ASM_END_1
--- a/src/dsp/lossless_enc_sse2.c
+++ b/src/dsp/lossless_enc_sse2.c
@@ -239,7 +239,7 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {

 static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
  int i;
-  float retval = 0.f;
+  double retval = 0.;
  int sumX = 0, sumXY = 0;
  const __m128i zero = _mm_setzero_si128();

@@ -273,7 +273,7 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
    }
  }
  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
-  return retval;
+  return (float)retval;
 }

 #else
--- a/src/dsp/lossless_mips_dsp_r2.c
+++ b/src/dsp/lossless_mips_dsp_r2.c
@@ -188,51 +188,46 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
  return Average2(Average2(a0, a1), Average2(a2, a3));
 }

-static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
-                                     const uint32_t* const top) {
-  return Average3(*left, top[0], top[1]);
+static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) {
+  return Average3(left, top[0], top[1]);
 }

-static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
-                                     const uint32_t* const top) {
-  return Average2(*left, top[-1]);
+static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) {
+  return Average2(left, top[-1]);
 }

-static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
-                                     const uint32_t* const top) {
-  return Average2(*left, top[0]);
+static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) {
+  return Average2(left, top[0]);
 }

-static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
-                                     const uint32_t* const top) {
+static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) {
  (void)left;
  return Average2(top[-1], top[0]);
 }

-static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
-                                     const uint32_t* const top) {
+static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) {
  (void)left;
  return Average2(top[0], top[1]);
 }

-static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
+static uint32_t Predictor10_MIPSdspR2(uint32_t left,
                                      const uint32_t* const top) {
-  return Average4(*left, top[-1], top[0], top[1]);
+  return Average4(left, top[-1], top[0], top[1]);
 }

-static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
+static uint32_t Predictor11_MIPSdspR2(uint32_t left,
                                      const uint32_t* const top) {
-  return Select(top[0], *left, top[-1]);
+  return Select(top[0], left, top[-1]);
 }

-static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
+static uint32_t Predictor12_MIPSdspR2(uint32_t left,
                                      const uint32_t* const top) {
-  return ClampedAddSubtractFull(*left, top[0], top[-1]);
+  return ClampedAddSubtractFull(left, top[0], top[-1]);
 }

-static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
+static uint32_t Predictor13_MIPSdspR2(uint32_t left,
                                      const uint32_t* const top) {
-  return ClampedAddSubtractHalf(*left, top[0], top[-1]);
+  return ClampedAddSubtractHalf(left, top[0], top[-1]);
 }

 // Add green to blue and red channels (i.e. perform the inverse transform of
--- a/src/dsp/lossless_neon.c
+++ b/src/dsp/lossless_neon.c
@@ -188,21 +188,17 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1,
  return avg;
 }

-static uint32_t Predictor5_NEON(const uint32_t* const left,
-                                const uint32_t* const top) {
-  return Average3_NEON(*left, top[0], top[1]);
+static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) {
+  return Average3_NEON(left, top[0], top[1]);
 }
-static uint32_t Predictor6_NEON(const uint32_t* const left,
-                                const uint32_t* const top) {
-  return Average2_NEON(*left, top[-1]);
+static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) {
+  return Average2_NEON(left, top[-1]);
 }
-static uint32_t Predictor7_NEON(const uint32_t* const left,
-                                const uint32_t* const top) {
-  return Average2_NEON(*left, top[0]);
+static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) {
+  return Average2_NEON(left, top[0]);
 }
-static uint32_t Predictor13_NEON(const uint32_t* const left,
-                                 const uint32_t* const top) {
-  return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]);
+static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) {
+  return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]);
 }

 // Batch versions of those functions.
--- a/src/dsp/lossless_sse2.c
+++ b/src/dsp/lossless_sse2.c
@@ -138,51 +138,42 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
  return output;
 }

-static uint32_t Predictor5_SSE2(const uint32_t* const left,
-                                const uint32_t* const top) {
-  const uint32_t pred = Average3_SSE2(*left, top[0], top[1]);
+static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average3_SSE2(left, top[0], top[1]);
  return pred;
 }
-static uint32_t Predictor6_SSE2(const uint32_t* const left,
-                                const uint32_t* const top) {
-  const uint32_t pred = Average2_SSE2(*left, top[-1]);
+static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2_SSE2(left, top[-1]);
  return pred;
 }
-static uint32_t Predictor7_SSE2(const uint32_t* const left,
-                                const uint32_t* const top) {
-  const uint32_t pred = Average2_SSE2(*left, top[0]);
+static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2_SSE2(left, top[0]);
  return pred;
 }
-static uint32_t Predictor8_SSE2(const uint32_t* const left,
-                                const uint32_t* const top) {
+static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) {
  const uint32_t pred = Average2_SSE2(top[-1], top[0]);
  (void)left;
  return pred;
 }
-static uint32_t Predictor9_SSE2(const uint32_t* const left,
-                                const uint32_t* const top) {
+static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) {
  const uint32_t pred = Average2_SSE2(top[0], top[1]);
  (void)left;
  return pred;
 }
-static uint32_t Predictor10_SSE2(const uint32_t* const left,
-                                 const uint32_t* const top) {
-  const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]);
+static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]);
  return pred;
 }
-static uint32_t Predictor11_SSE2(const uint32_t* const left,
-                                 const uint32_t* const top) {
-  const uint32_t pred = Select_SSE2(top[0], *left, top[-1]);
+static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Select_SSE2(top[0], left, top[-1]);
  return pred;
 }
-static uint32_t Predictor12_SSE2(const uint32_t* const left,
-                                 const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]);
+static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]);
  return pred;
 }
-static uint32_t Predictor13_SSE2(const uint32_t* const left,
-                                 const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]);
+static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]);
  return pred;
 }

--- a/src/dsp/msa_macro.h
+++ b/src/dsp/msa_macro.h
@@ -14,10 +14,6 @@
 #ifndef WEBP_DSP_MSA_MACRO_H_
 #define WEBP_DSP_MSA_MACRO_H_

-#include "src/dsp/dsp.h"
-
-#if defined(WEBP_USE_MSA)
-
 #include <stdint.h>
 #include <msa.h>

@@ -1393,5 +1389,4 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
 } while (0)
 #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)

-#endif  // WEBP_USE_MSA
 #endif  // WEBP_DSP_MSA_MACRO_H_
--- a/src/dsp/neon.h
+++ b/src/dsp/neon.h
@@ -12,12 +12,10 @@
 #ifndef WEBP_DSP_NEON_H_
 #define WEBP_DSP_NEON_H_

-#include "src/dsp/dsp.h"
-
-#if defined(WEBP_USE_NEON)
-
 #include <arm_neon.h>

+#include "src/dsp/dsp.h"
+
 // Right now, some intrinsics functions seem slower, so we disable them
 // everywhere except newer clang/gcc or aarch64 where the inline assembly is
 // incompatible.
@@ -100,5 +98,4 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
 } while (0)
 #endif

-#endif  // WEBP_USE_NEON
 #endif  // WEBP_DSP_NEON_H_
--- a/src/dsp/yuv.c
+++ b/src/dsp/yuv.c
@@ -194,6 +194,50 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,

 //-----------------------------------------------------------------------------

+#if !WEBP_NEON_OMIT_C_CODE
+#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
+static uint16_t clip_y(int v) {
+  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
+}
+
+static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src,
+                                  uint16_t* dst, int len) {
+  uint64_t diff = 0;
+  int i;
+  for (i = 0; i < len; ++i) {
+    const int diff_y = ref[i] - src[i];
+    const int new_y = (int)dst[i] + diff_y;
+    dst[i] = clip_y(new_y);
+    diff += (uint64_t)abs(diff_y);
+  }
+  return diff;
+}
+
+static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src,
+                                int16_t* dst, int len) {
+  int i;
+  for (i = 0; i < len; ++i) {
+    const int diff_uv = ref[i] - src[i];
+    dst[i] += diff_uv;
+  }
+}
+
+static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
+                                const uint16_t* best_y, uint16_t* out) {
+  int i;
+  for (i = 0; i < len; ++i, ++A, ++B) {
+    const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
+    const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
+    out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
+    out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+#undef MAX_Y
+
+//-----------------------------------------------------------------------------
+
 void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
 void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
 void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
@@ -203,9 +247,18 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
 void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
                            int src_width, int do_store);

+uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src,
+                                uint16_t* dst, int len);
+void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
+                              int16_t* dst, int len);
+void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
+                              const uint16_t* best_y, uint16_t* out);
+
 extern void WebPInitConvertARGBToYUVSSE2(void);
 extern void WebPInitConvertARGBToYUVSSE41(void);
 extern void WebPInitConvertARGBToYUVNEON(void);
+extern void WebPInitSharpYUVSSE2(void);
+extern void WebPInitSharpYUVNEON(void);

 WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
  WebPConvertARGBToY = ConvertARGBToY_C;
@@ -216,10 +269,17 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {

  WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;

+#if !WEBP_NEON_OMIT_C_CODE
+  WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
+  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
+  WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
+#endif
+
  if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_HAVE_SSE2)
    if (VP8GetCPUInfo(kSSE2)) {
      WebPInitConvertARGBToYUVSSE2();
+      WebPInitSharpYUVSSE2();
    }
 #endif  // WEBP_HAVE_SSE2
 #if defined(WEBP_HAVE_SSE41)
@@ -233,6 +293,7 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
  if (WEBP_NEON_OMIT_C_CODE ||
      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
    WebPInitConvertARGBToYUVNEON();
+    WebPInitSharpYUVNEON();
  }
 #endif  // WEBP_HAVE_NEON

@@ -241,4 +302,7 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
  assert(WebPConvertRGB24ToY != NULL);
  assert(WebPConvertBGR24ToY != NULL);
  assert(WebPConvertRGBA32ToUV != NULL);
+  assert(WebPSharpYUVUpdateY != NULL);
+  assert(WebPSharpYUVUpdateRGB != NULL);
+  assert(WebPSharpYUVFilterRow != NULL);
 }
--- a/src/dsp/yuv.h
+++ b/src/dsp/yuv.h
@@ -10,7 +10,7 @@
 // inline YUV<->RGB conversion function
 //
 // The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
-// More information at: https://en.wikipedia.org/wiki/YCbCr
+// More information at: http://en.wikipedia.org/wiki/YCbCr
 // Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
 // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
 // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
--- a/src/dsp/yuv_neon.c
+++ b/src/dsp/yuv_neon.c
@@ -173,8 +173,116 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
  WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
 }

+//------------------------------------------------------------------------------
+
+#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
+static uint16_t clip_y_NEON(int v) {
+  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
+}
+
+static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
+                                     uint16_t* dst, int len) {
+  int i;
+  const int16x8_t zero = vdupq_n_s16(0);
+  const int16x8_t max = vdupq_n_s16(MAX_Y);
+  uint64x2_t sum = vdupq_n_u64(0);
+  uint64_t diff;
+
+  for (i = 0; i + 8 <= len; i += 8) {
+    const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
+    const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
+    const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
+    const int16x8_t D = vsubq_s16(A, B);       // diff_y
+    const int16x8_t F = vaddq_s16(C, D);       // new_y
+    const uint16x8_t H =
+        vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
+    const int16x8_t I = vabsq_s16(D);          // abs(diff_y)
+    vst1q_u16(dst + i, H);
+    sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
+  }
+  diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
+  for (; i < len; ++i) {
+    const int diff_y = ref[i] - src[i];
+    const int new_y = (int)(dst[i]) + diff_y;
+    dst[i] = clip_y_NEON(new_y);
+    diff += (uint64_t)(abs(diff_y));
+  }
+  return diff;
+}
+
+static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
+                                   int16_t* dst, int len) {
+  int i;
+  for (i = 0; i + 8 <= len; i += 8) {
+    const int16x8_t A = vld1q_s16(ref + i);
+    const int16x8_t B = vld1q_s16(src + i);
+    const int16x8_t C = vld1q_s16(dst + i);
+    const int16x8_t D = vsubq_s16(A, B);   // diff_uv
+    const int16x8_t E = vaddq_s16(C, D);   // new_uv
+    vst1q_s16(dst + i, E);
+  }
+  for (; i < len; ++i) {
+    const int diff_uv = ref[i] - src[i];
+    dst[i] += diff_uv;
+  }
+}
+
+static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
+                                   const uint16_t* best_y, uint16_t* out) {
+  int i;
+  const int16x8_t max = vdupq_n_s16(MAX_Y);
+  const int16x8_t zero = vdupq_n_s16(0);
+  for (i = 0; i + 8 <= len; i += 8) {
+    const int16x8_t a0 = vld1q_s16(A + i + 0);
+    const int16x8_t a1 = vld1q_s16(A + i + 1);
+    const int16x8_t b0 = vld1q_s16(B + i + 0);
+    const int16x8_t b1 = vld1q_s16(B + i + 1);
+    const int16x8_t a0b1 = vaddq_s16(a0, b1);
+    const int16x8_t a1b0 = vaddq_s16(a1, b0);
+    const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0);  // A0+A1+B0+B1
+    const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1);    // 2*(A0+B1)
+    const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0);    // 2*(A1+B0)
+    const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
+    const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
+    const int16x8_t d0 = vaddq_s16(c1, a0);
+    const int16x8_t d1 = vaddq_s16(c0, a1);
+    const int16x8_t e0 = vrshrq_n_s16(d0, 1);
+    const int16x8_t e1 = vrshrq_n_s16(d1, 1);
+    const int16x8x2_t f = vzipq_s16(e0, e1);
+    const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
+    const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
+    const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
+    const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
+    const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
+    const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
+    vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
+    vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
+  }
+  for (; i < len; ++i) {
+    const int a0b1 = A[i + 0] + B[i + 1];
+    const int a1b0 = A[i + 1] + B[i + 0];
+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
+    out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
+    out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
+  }
+}
+#undef MAX_Y
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitSharpYUVNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
+  WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
+  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
+  WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
+}
+
 #else  // !WEBP_USE_NEON

 WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
+WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)

 #endif  // WEBP_USE_NEON
--- a/src/dsp/yuv_sse2.c
+++ b/src/dsp/yuv_sse2.c
@@ -747,9 +747,128 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
  WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
 }

+//------------------------------------------------------------------------------
+
+#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
+static uint16_t clip_y(int v) {
+  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
+}
+
+static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
+                                     uint16_t* dst, int len) {
+  uint64_t diff = 0;
+  uint32_t tmp[4];
+  int i;
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i max = _mm_set1_epi16(MAX_Y);
+  const __m128i one = _mm_set1_epi16(1);
+  __m128i sum = zero;
+
+  for (i = 0; i + 8 <= len; i += 8) {
+    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
+    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
+    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
+    const __m128i D = _mm_sub_epi16(A, B);       // diff_y
+    const __m128i E = _mm_cmpgt_epi16(zero, D);  // sign (-1 or 0)
+    const __m128i F = _mm_add_epi16(C, D);       // new_y
+    const __m128i G = _mm_or_si128(E, one);      // -1 or 1
+    const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
+    const __m128i I = _mm_madd_epi16(D, G);      // sum(abs(...))
+    _mm_storeu_si128((__m128i*)(dst + i), H);
+    sum = _mm_add_epi32(sum, I);
+  }
+  _mm_storeu_si128((__m128i*)tmp, sum);
+  diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
+  for (; i < len; ++i) {
+    const int diff_y = ref[i] - src[i];
+    const int new_y = (int)dst[i] + diff_y;
+    dst[i] = clip_y(new_y);
+    diff += (uint64_t)abs(diff_y);
+  }
+  return diff;
+}
+
+static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
+                                   int16_t* dst, int len) {
+  int i = 0;
+  for (i = 0; i + 8 <= len; i += 8) {
+    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
+    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
+    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
+    const __m128i D = _mm_sub_epi16(A, B);   // diff_uv
+    const __m128i E = _mm_add_epi16(C, D);   // new_uv
+    _mm_storeu_si128((__m128i*)(dst + i), E);
+  }
+  for (; i < len; ++i) {
+    const int diff_uv = ref[i] - src[i];
+    dst[i] += diff_uv;
+  }
+}
+
+static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
+                                   const uint16_t* best_y, uint16_t* out) {
+  int i;
+  const __m128i kCst8 = _mm_set1_epi16(8);
+  const __m128i max = _mm_set1_epi16(MAX_Y);
+  const __m128i zero = _mm_setzero_si128();
+  for (i = 0; i + 8 <= len; i += 8) {
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
+    const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
+    const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
+    const __m128i a0b1 = _mm_add_epi16(a0, b1);
+    const __m128i a1b0 = _mm_add_epi16(a1, b0);
+    const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0);  // A0+A1+B0+B1
+    const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
+    const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);    // 2*(A0+B1)
+    const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);    // 2*(A1+B0)
+    const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
+    const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
+    const __m128i d0 = _mm_add_epi16(c1, a0);
+    const __m128i d1 = _mm_add_epi16(c0, a1);
+    const __m128i e0 = _mm_srai_epi16(d0, 1);
+    const __m128i e1 = _mm_srai_epi16(d1, 1);
+    const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
+    const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
+    const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
+    const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
+    const __m128i h0 = _mm_add_epi16(g0, f0);
+    const __m128i h1 = _mm_add_epi16(g1, f1);
+    const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
+    const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
+    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
+    _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
+  }
+  for (; i < len; ++i) {
+    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
+    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
+    // We reuse the common sub-expressions.
+    const int a0b1 = A[i + 0] + B[i + 1];
+    const int a1b0 = A[i + 1] + B[i + 0];
+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
+    out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
+    out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
+  }
+}
+
+#undef MAX_Y
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitSharpYUVSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) {
+  WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2;
+  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2;
+  WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2;
+}
+
 #else  // !WEBP_USE_SSE2

 WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
 WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
+WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2)

 #endif  // WEBP_USE_SSE2
--- a/src/enc/Makefile.am
+++ b/src/enc/Makefile.am
@@ -37,7 +37,6 @@ libwebpencodeinclude_HEADERS += ../webp/types.h
 noinst_HEADERS =
 noinst_HEADERS += ../webp/format_constants.h

-libwebpencode_la_LIBADD = ../../sharpyuv/libsharpyuv.la
 libwebpencode_la_LDFLAGS = -lm
 libwebpencode_la_CPPFLAGS = $(AM_CPPFLAGS)
 libwebpencodeincludedir = $(includedir)/webp
--- a/src/enc/alpha_enc.c
+++ b/src/enc/alpha_enc.c
@@ -86,7 +86,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
  // a decoder bug related to alpha with color cache.
  // See: https://code.google.com/p/webp/issues/detail?id=239
  // Need to re-enable this later.
-  ok = VP8LEncodeStream(&config, &picture, bw, /*use_cache=*/0);
+  ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK);
  WebPPictureFree(&picture);
  ok = ok && !bw->error_;
  if (!ok) {
--- a/Show More
+++ b/Show More