libwebp.jar: build w/Java 1.6 for Android compat

broken since: a5c297c swig/java: reduce wrapper function code duplication this was a part of v0.3.1, but not v0.3.0. Change-Id: I001d4bd0a7a1aa1b2d267bc63bc1d8226bff00c1 (cherry picked from commit de899516c7)
fix memleak in WebPIDelete()
2025-07-15 13:29:54 +02:00 · 2013-10-16 19:17:16 +02:00 · 2013-10-16 19:16:53 +02:00
208 changed files with 12537 additions and 41200 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -2,4 +2,3 @@
 .gitignore export-ignore
 .mailmap export-ignore
 *.pdf -text -diff
-*.ppm -text -diff
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,5 @@
 *.l[ao]
 *.[ao]
-*.pc
 .deps
 .libs
 /aclocal.m4
@ -15,16 +14,12 @@
 /libtool
 /ltmain.sh
 /missing
-/mkinstalldirs
 /stamp-h1
 Makefile
 Makefile.in
-examples/anim_diff
 examples/[cdv]webp
 examples/gif2webp
 examples/webpmux
-src/webp/config.h*
-src/webp/stamp-h1
 /output
 /doc/output
 *.idb
--- a/.mailmap
+++ b/.mailmap
@ -1,10 +1,6 @@
 <johann.koenig@duck.com> <johannkoenig@google.com>
 Mikołaj Zalewski <mikolajz@google.com>
 Pascal Massimino <pascal.massimino@gmail.com>
-<pascal.massimino@gmail.com> <skal@google.com>
 Vikas Arora <vikasa@google.com>
 <vikasa@google.com> <vikasa@gmail.com>
 <vikasa@google.com> <vikaas.arora@gmail.com>
-<slobodan.prijic@imgtec.com> <Slobodan.Prijic@imgtec.com>
-<vrabaud@google.com> <vincent.rabaud@gmail.com>
-Tamar Levy <tamar.levy@intel.com>
--- a/15
+++ b/15
@ -1,32 +1,17 @@
 Contributors:
- Charles Munger (clm at google dot com)
 - Christian Duvivier (cduvivier at google dot com)
- Djordje Pesut (djordje dot pesut at imgtec dot com)
 - James Zern (jzern at google dot com)
 - Jan Engelhardt (jengelh at medozas dot de)
 - Johann (johann dot koenig at duck dot com)
- Jovan Zelincevic (jovan dot zelincevic at imgtec dot com)
 - Jyrki Alakuijala (jyrki at google dot com)
- Lode Vandevenne (lode at google dot com)
 - Lou Quillio (louquillio at google dot com)
 - Mans Rullgard (mans at mansr dot com)
 - Martin Olsson (mnemo at minimum dot se)
 - Mikołaj Zalewski (mikolajz at google dot com)
- Mislav Bradac (mislavm at google dot com)
 - Noel Chromium (noel at chromium dot org)
 - Pascal Massimino (pascal dot massimino at gmail dot com)
- Paweł Hajdan, Jr (phajdan dot jr at chromium dot org)
 - Pierre Joye (pierre dot php at gmail dot com)
- Sam Clegg (sbc at chromium dot org)
- Scott Hancher (seh at google dot com)
 - Scott LaVarnway (slavarnway at google dot com)
- Scott Talbot (s at chikachow dot org)
- Slobodan Prijic (slobodan dot prijic at imgtec dot com)
 - Somnath Banerjee (somnath dot banerjee at gmail dot com)
- Sriraman Tallam (tmsriram at google dot com)
- Tamar Levy (tamar dot levy at intel dot com)
- Timothy Gu (timothygu99 at gmail dot com)
 - Urvang Joshi (urvang at google dot com)
 - Vikas Arora (vikasa at google dot com)
- Vincent Rabaud (vrabaud at google dot com)
- Yang Zhang (yang dot zhang at arm dot com)
--- a/Android.mk
+++ b/Android.mk
@ -1,253 +1,75 @@
-LOCAL_PATH := $(call my-dir)
+LOCAL_PATH:= $(call my-dir)

-WEBP_CFLAGS := -Wall -DANDROID -DHAVE_MALLOC_H -DHAVE_PTHREAD -DWEBP_USE_THREAD
-
-ifeq ($(APP_OPTIM),release)
-  WEBP_CFLAGS += -finline-functions -ffast-math \
-                 -ffunction-sections -fdata-sections
-  ifeq ($(findstring clang,$(NDK_TOOLCHAIN_VERSION)),)
-    WEBP_CFLAGS += -frename-registers -s
-  endif
-endif
-
-ifneq ($(findstring armeabi-v7a, $(TARGET_ARCH_ABI)),)
-  # Setting LOCAL_ARM_NEON will enable -mfpu=neon which may cause illegal
-  # instructions to be generated for armv7a code. Instead target the neon code
-  # specifically.
-  NEON := c.neon
-  USE_CPUFEATURES := yes
-else
-  NEON := c
-endif
-
-dec_srcs := \
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := \
    src/dec/alpha.c \
    src/dec/buffer.c \
    src/dec/frame.c \
    src/dec/idec.c \
    src/dec/io.c \
+    src/dec/layer.c \
    src/dec/quant.c \
    src/dec/tree.c \
    src/dec/vp8.c \
    src/dec/vp8l.c \
    src/dec/webp.c \
-
-demux_srcs := \
-    src/demux/anim_decode.c \
-    src/demux/demux.c \
-
-dsp_dec_srcs := \
-    src/dsp/alpha_processing.c \
-    src/dsp/alpha_processing_mips_dsp_r2.c \
-    src/dsp/alpha_processing_sse2.c \
-    src/dsp/alpha_processing_sse41.c \
-    src/dsp/argb.c \
-    src/dsp/argb_mips_dsp_r2.c \
-    src/dsp/argb_sse2.c \
    src/dsp/cpu.c \
    src/dsp/dec.c \
-    src/dsp/dec_clip_tables.c \
-    src/dsp/dec_mips32.c \
-    src/dsp/dec_mips_dsp_r2.c \
-    src/dsp/dec_neon.$(NEON) \
    src/dsp/dec_sse2.c \
-    src/dsp/dec_sse41.c \
-    src/dsp/filters.c \
-    src/dsp/filters_mips_dsp_r2.c \
-    src/dsp/filters_sse2.c \
+    src/dsp/enc.c \
+    src/dsp/enc_sse2.c \
    src/dsp/lossless.c \
-    src/dsp/lossless_mips_dsp_r2.c \
-    src/dsp/lossless_neon.$(NEON) \
-    src/dsp/lossless_sse2.c \
-    src/dsp/rescaler.c \
-    src/dsp/rescaler_mips32.c \
-    src/dsp/rescaler_mips_dsp_r2.c \
-    src/dsp/rescaler_neon.$(NEON) \
-    src/dsp/rescaler_sse2.c \
    src/dsp/upsampling.c \
-    src/dsp/upsampling_mips_dsp_r2.c \
-    src/dsp/upsampling_neon.$(NEON) \
    src/dsp/upsampling_sse2.c \
    src/dsp/yuv.c \
-    src/dsp/yuv_mips32.c \
-    src/dsp/yuv_mips_dsp_r2.c \
-    src/dsp/yuv_sse2.c \
-
-dsp_enc_srcs := \
-    src/dsp/cost.c \
-    src/dsp/cost_mips32.c \
-    src/dsp/cost_mips_dsp_r2.c \
-    src/dsp/cost_sse2.c \
-    src/dsp/enc.c \
-    src/dsp/enc_avx2.c \
-    src/dsp/enc_mips32.c \
-    src/dsp/enc_mips_dsp_r2.c \
-    src/dsp/enc_neon.$(NEON) \
-    src/dsp/enc_sse2.c \
-    src/dsp/enc_sse41.c \
-    src/dsp/lossless_enc.c \
-    src/dsp/lossless_enc_mips32.c \
-    src/dsp/lossless_enc_mips_dsp_r2.c \
-    src/dsp/lossless_enc_neon.$(NEON) \
-    src/dsp/lossless_enc_sse2.c \
-    src/dsp/lossless_enc_sse41.c \
-
-enc_srcs := \
    src/enc/alpha.c \
    src/enc/analysis.c \
    src/enc/backward_references.c \
    src/enc/config.c \
    src/enc/cost.c \
-    src/enc/delta_palettization.c \
    src/enc/filter.c \
    src/enc/frame.c \
    src/enc/histogram.c \
    src/enc/iterator.c \
-    src/enc/near_lossless.c \
+    src/enc/layer.c \
    src/enc/picture.c \
-    src/enc/picture_csp.c \
-    src/enc/picture_psnr.c \
-    src/enc/picture_rescale.c \
-    src/enc/picture_tools.c \
    src/enc/quant.c \
    src/enc/syntax.c \
    src/enc/token.c \
    src/enc/tree.c \
    src/enc/vp8l.c \
    src/enc/webpenc.c \
-
-mux_srcs := \
-    src/mux/anim_encode.c \
-    src/mux/muxedit.c \
-    src/mux/muxinternal.c \
-    src/mux/muxread.c \
-
-utils_dec_srcs := \
    src/utils/bit_reader.c \
+    src/utils/bit_writer.c \
    src/utils/color_cache.c \
    src/utils/filters.c \
    src/utils/huffman.c \
+    src/utils/huffman_encode.c \
+    src/utils/quant_levels.c \
    src/utils/quant_levels_dec.c \
-    src/utils/random.c \
    src/utils/rescaler.c \
    src/utils/thread.c \
    src/utils/utils.c \

-utils_enc_srcs := \
-    src/utils/bit_writer.c \
-    src/utils/huffman_encode.c \
-    src/utils/quant_levels.c \
+LOCAL_CFLAGS := -Wall -DANDROID -DHAVE_MALLOC_H -DHAVE_PTHREAD \
+                -DWEBP_USE_THREAD \
+                -finline-functions -frename-registers -ffast-math \
+                -s -fomit-frame-pointer -Isrc/webp

-################################################################################
-# libwebpdecoder
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-    $(dec_srcs) \
-    $(dsp_dec_srcs) \
-    $(utils_dec_srcs) \
-
-LOCAL_CFLAGS := $(WEBP_CFLAGS)
 LOCAL_C_INCLUDES += $(LOCAL_PATH)/src

-# prefer arm over thumb mode for performance gains
-LOCAL_ARM_MODE := arm
-
-ifeq ($(USE_CPUFEATURES),yes)
-  LOCAL_STATIC_LIBRARIES := cpufeatures
+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+  # Setting LOCAL_ARM_NEON will enable -mfpu=neon which may cause illegal
+  # instructions to be generated for armv7a code. Instead target the neon code
+  # specifically.
+  LOCAL_SRC_FILES += src/dsp/dec_neon.c.neon
+  LOCAL_SRC_FILES += src/dsp/upsampling_neon.c.neon
+  LOCAL_SRC_FILES += src/dsp/enc_neon.c.neon
 endif
+LOCAL_STATIC_LIBRARIES := cpufeatures

-LOCAL_MODULE := webpdecoder_static
+LOCAL_MODULE:= webp

 include $(BUILD_STATIC_LIBRARY)

-ifeq ($(ENABLE_SHARED),1)
-include $(CLEAR_VARS)
-
-LOCAL_WHOLE_STATIC_LIBRARIES := webpdecoder_static
-
-LOCAL_MODULE := webpdecoder
-
-include $(BUILD_SHARED_LIBRARY)
-endif  # ENABLE_SHARED=1
-
-################################################################################
-# libwebp
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-    $(dsp_enc_srcs) \
-    $(enc_srcs) \
-    $(utils_enc_srcs) \
-
-LOCAL_CFLAGS := $(WEBP_CFLAGS)
-LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
-
-# prefer arm over thumb mode for performance gains
-LOCAL_ARM_MODE := arm
-
-LOCAL_WHOLE_STATIC_LIBRARIES := webpdecoder_static
-
-LOCAL_MODULE := webp
-
-ifeq ($(ENABLE_SHARED),1)
-  include $(BUILD_SHARED_LIBRARY)
-else
-  include $(BUILD_STATIC_LIBRARY)
-endif
-
-################################################################################
-# libwebpdemux
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := $(demux_srcs)
-
-LOCAL_CFLAGS := $(WEBP_CFLAGS)
-LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
-
-# prefer arm over thumb mode for performance gains
-LOCAL_ARM_MODE := arm
-
-LOCAL_MODULE := webpdemux
-
-ifeq ($(ENABLE_SHARED),1)
-  LOCAL_SHARED_LIBRARIES := webp
-  include $(BUILD_SHARED_LIBRARY)
-else
-  LOCAL_STATIC_LIBRARIES := webp
-  include $(BUILD_STATIC_LIBRARY)
-endif
-
-################################################################################
-# libwebpmux
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := $(mux_srcs)
-
-LOCAL_CFLAGS := $(WEBP_CFLAGS)
-LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
-
-# prefer arm over thumb mode for performance gains
-LOCAL_ARM_MODE := arm
-
-LOCAL_MODULE := webpmux
-
-ifeq ($(ENABLE_SHARED),1)
-  LOCAL_SHARED_LIBRARIES := webp
-  include $(BUILD_SHARED_LIBRARY)
-else
-  LOCAL_STATIC_LIBRARIES := webp
-  include $(BUILD_STATIC_LIBRARY)
-endif
-
-################################################################################
-
-include $(LOCAL_PATH)/examples/Android.mk
-
-ifeq ($(USE_CPUFEATURES),yes)
-  $(call import-module,android/cpufeatures)
-endif
+$(call import-module,android/cpufeatures)
--- a/1565
+++ b/1565
--- a/Makefile.vc
+++ b/Makefile.vc
@ -11,8 +11,6 @@ LIBWEBPDEMUX_BASENAME = libwebpdemux
 ARCH = x86
 !ELSE IF ! [ cl 2>&1 | find "x64" > NUL ]
 ARCH = x64
-!ELSE IF ! [ cl 2>&1 | find "ARM" > NUL ]
-ARCH = ARM
 !ELSE
 !ERROR Unable to auto-detect toolchain architecture! \
 If cl.exe is in your PATH rerun nmake with ARCH=<arch>.
@ -29,8 +27,9 @@ PLATFORM_LDFLAGS = /SAFESEH
 NOLOGO     = /nologo
 CCNODBG    = cl.exe $(NOLOGO) /O2 /DNDEBUG
 CCDEBUG    = cl.exe $(NOLOGO) /Od /Gm /Zi /D_DEBUG /RTC1
-CFLAGS     = /Isrc $(NOLOGO) /W3 /EHsc /c
+CFLAGS     = /Isrc $(NOLOGO) /W3 /EHsc /c /GS
 CFLAGS     = $(CFLAGS) /DWIN32 /D_CRT_SECURE_NO_WARNINGS /DWIN32_LEAN_AND_MEAN
+CFLAGS     = $(CFLAGS) /DHAVE_WINCODEC_H /DWEBP_USE_THREAD
 LDFLAGS    = /LARGEADDRESSAWARE /MANIFEST /NXCOMPAT /DYNAMICBASE
 LDFLAGS    = $(LDFLAGS) $(PLATFORM_LDFLAGS)
 LNKDLL     = link.exe /DLL $(NOLOGO)
@ -38,12 +37,6 @@ LNKEXE     = link.exe $(NOLOGO)
 LNKLIB     = lib.exe $(NOLOGO)
 MT         = mt.exe $(NOLOGO)

-!IF "$(ARCH)" == "ARM"
-CFLAGS = $(CFLAGS) /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP /DWEBP_USE_THREAD
-!ELSE
-CFLAGS = $(CFLAGS) /DHAVE_WINCODEC_H /DWEBP_USE_THREAD
-!ENDIF
-
 CFGSET     = FALSE
 !IF "$(OBJDIR)" == ""
 OUTDIR = ..\obj\
@ -51,21 +44,11 @@ OUTDIR = ..\obj\
 OUTDIR = $(OBJDIR)
 !ENDIF

-!IF "$(HAVE_AVX2)" == "1"
-CFLAGS = $(CFLAGS) /DWEBP_HAVE_AVX2
-AVX2_FLAGS = /arch:AVX2
-!ENDIF
-
 ##############################################################
 # Runtime library configuration
 !IF "$(RTLIBCFG)" == "static"
 RTLIB  = /MT
 RTLIBD = /MTd
-!ELSE IF "$(RTLIBCFG)" == "legacy"
-RTLIBCFG = static
-RTLIB  = /MT
-RTLIBD = /MTd
-CFLAGS = $(CFLAGS) /GS- /arch:IA32
 !ELSE
 RTLIB   = /MD
 RTLIBD  = /MDd
@ -118,7 +101,9 @@ LIBWEBP = $(DIRLIB)\$(LIBWEBP_BASENAME).lib
 LIBWEBPMUX = $(DIRLIB)\$(LIBWEBPMUX_BASENAME).lib
 LIBWEBPDEMUX = $(DIRLIB)\$(LIBWEBPDEMUX_BASENAME).lib
 !ELSE IF "$(DLLBUILD)" == "TRUE"
+DLLC   = webp_dll.c
 DLLINC = webp_dll.h
+DLL_OBJS = $(DIROBJ)\$(DLLC:.c=.obj)
 CC     = $(CC) /I$(DIROBJ) /FI$(DLLINC) $(RTLIB) /DWEBP_DLL
 LIBWEBPDECODER = $(DIRLIB)\$(LIBWEBPDECODER_BASENAME)_dll.lib
 LIBWEBP = $(DIRLIB)\$(LIBWEBP_BASENAME)_dll.lib
@ -147,11 +132,8 @@ CFGSET = TRUE
 !MESSAGE .                                  features enabled.
 !MESSAGE - (empty)                        - build libwebp-based targets for CFG
 !MESSAGE - all                            - build (de)mux-based targets for CFG
-!MESSAGE - gif2webp                       - requires libgif & >= VS2013
-!MESSAGE - anim_diff                      - requires libgif & >= VS2013
 !MESSAGE
 !MESSAGE RTLIBCFG controls the runtime library linkage - 'static' or 'dynamic'.
-!MESSAGE   'legacy' will produce a Windows 2000 compatible library.
 !MESSAGE OBJDIR is the path where you like to build (obj, bins, etc.),
 !MESSAGE   defaults to ..\obj

@ -174,6 +156,7 @@ DEC_OBJS = \
    $(DIROBJ)\dec\frame.obj \
    $(DIROBJ)\dec\idec.obj \
    $(DIROBJ)\dec\io.obj \
+    $(DIROBJ)\dec\layer.obj \
    $(DIROBJ)\dec\quant.obj \
    $(DIROBJ)\dec\tree.obj \
    $(DIROBJ)\dec\vp8.obj \
@ -181,79 +164,31 @@ DEC_OBJS = \
    $(DIROBJ)\dec\webp.obj \

 DEMUX_OBJS = \
-    $(DIROBJ)\demux\anim_decode.obj \
    $(DIROBJ)\demux\demux.obj \

 DSP_DEC_OBJS = \
-    $(DIROBJ)\dsp\alpha_processing.obj \
-    $(DIROBJ)\dsp\alpha_processing_mips_dsp_r2.obj \
-    $(DIROBJ)\dsp\alpha_processing_sse2.obj \
-    $(DIROBJ)\dsp\alpha_processing_sse41.obj \
    $(DIROBJ)\dsp\cpu.obj \
    $(DIROBJ)\dsp\dec.obj \
-    $(DIROBJ)\dsp\dec_clip_tables.obj \
-    $(DIROBJ)\dsp\dec_mips32.obj \
-    $(DIROBJ)\dsp\dec_mips_dsp_r2.obj \
    $(DIROBJ)\dsp\dec_neon.obj \
    $(DIROBJ)\dsp\dec_sse2.obj \
-    $(DIROBJ)\dsp\dec_sse41.obj \
-    $(DIROBJ)\dsp\filters.obj \
-    $(DIROBJ)\dsp\filters_mips_dsp_r2.obj \
-    $(DIROBJ)\dsp\filters_sse2.obj \
    $(DIROBJ)\dsp\lossless.obj \
-    $(DIROBJ)\dsp\lossless_mips_dsp_r2.obj \
-    $(DIROBJ)\dsp\lossless_neon.obj \
-    $(DIROBJ)\dsp\lossless_sse2.obj \
-    $(DIROBJ)\dsp\rescaler.obj \
-    $(DIROBJ)\dsp\rescaler_mips32.obj \
-    $(DIROBJ)\dsp\rescaler_mips_dsp_r2.obj \
-    $(DIROBJ)\dsp\rescaler_neon.obj \
-    $(DIROBJ)\dsp\rescaler_sse2.obj \
    $(DIROBJ)\dsp\upsampling.obj \
-    $(DIROBJ)\dsp\upsampling_mips_dsp_r2.obj \
    $(DIROBJ)\dsp\upsampling_neon.obj \
    $(DIROBJ)\dsp\upsampling_sse2.obj \
    $(DIROBJ)\dsp\yuv.obj \
-    $(DIROBJ)\dsp\yuv_mips32.obj \
-    $(DIROBJ)\dsp\yuv_mips_dsp_r2.obj \
-    $(DIROBJ)\dsp\yuv_sse2.obj \

 DSP_ENC_OBJS = \
-    $(DIROBJ)\dsp\argb.obj \
-    $(DIROBJ)\dsp\argb_mips_dsp_r2.obj \
-    $(DIROBJ)\dsp\argb_sse2.obj \
-    $(DIROBJ)\dsp\cost.obj \
-    $(DIROBJ)\dsp\cost_mips32.obj \
-    $(DIROBJ)\dsp\cost_mips_dsp_r2.obj \
-    $(DIROBJ)\dsp\cost_sse2.obj \
    $(DIROBJ)\dsp\enc.obj \
-    $(DIROBJ)\dsp\enc_avx2.obj \
-    $(DIROBJ)\dsp\enc_mips32.obj \
-    $(DIROBJ)\dsp\enc_mips_dsp_r2.obj \
    $(DIROBJ)\dsp\enc_neon.obj \
    $(DIROBJ)\dsp\enc_sse2.obj \
-    $(DIROBJ)\dsp\enc_sse41.obj \
-    $(DIROBJ)\dsp\lossless_enc.obj \
-    $(DIROBJ)\dsp\lossless_enc_mips32.obj \
-    $(DIROBJ)\dsp\lossless_enc_mips_dsp_r2.obj \
-    $(DIROBJ)\dsp\lossless_enc_neon.obj \
-    $(DIROBJ)\dsp\lossless_enc_sse2.obj \
-    $(DIROBJ)\dsp\lossless_enc_sse41.obj \
-
-EX_ANIM_UTIL_OBJS = \
-    $(DIROBJ)\examples\anim_util.obj \

 EX_FORMAT_DEC_OBJS = \
    $(DIROBJ)\examples\jpegdec.obj \
    $(DIROBJ)\examples\metadata.obj \
    $(DIROBJ)\examples\pngdec.obj \
    $(DIROBJ)\examples\tiffdec.obj \
-    $(DIROBJ)\examples\webpdec.obj \
    $(DIROBJ)\examples\wicdec.obj \

-EX_GIF_DEC_OBJS = \
-    $(DIROBJ)\examples\gifdec.obj \
-
 EX_UTIL_OBJS = \
    $(DIROBJ)\examples\example_util.obj \

@ -263,17 +198,12 @@ ENC_OBJS = \
    $(DIROBJ)\enc\backward_references.obj \
    $(DIROBJ)\enc\config.obj \
    $(DIROBJ)\enc\cost.obj \
-    $(DIROBJ)\enc\delta_palettization.obj \
    $(DIROBJ)\enc\filter.obj \
    $(DIROBJ)\enc\frame.obj \
    $(DIROBJ)\enc\histogram.obj \
    $(DIROBJ)\enc\iterator.obj \
-    $(DIROBJ)\enc\near_lossless.obj \
+    $(DIROBJ)\enc\layer.obj \
    $(DIROBJ)\enc\picture.obj \
-    $(DIROBJ)\enc\picture_csp.obj \
-    $(DIROBJ)\enc\picture_psnr.obj \
-    $(DIROBJ)\enc\picture_rescale.obj \
-    $(DIROBJ)\enc\picture_tools.obj \
    $(DIROBJ)\enc\quant.obj \
    $(DIROBJ)\enc\syntax.obj \
    $(DIROBJ)\enc\token.obj \
@ -282,7 +212,6 @@ ENC_OBJS = \
    $(DIROBJ)\enc\webpenc.obj \

 MUX_OBJS = \
-    $(DIROBJ)\mux\anim_encode.obj \
    $(DIROBJ)\mux\muxedit.obj \
    $(DIROBJ)\mux\muxinternal.obj \
    $(DIROBJ)\mux\muxread.obj \
@ -294,7 +223,6 @@ UTILS_DEC_OBJS = \
    $(DIROBJ)\utils\huffman.obj \
    $(DIROBJ)\utils\quant_levels_dec.obj \
    $(DIROBJ)\utils\rescaler.obj \
-    $(DIROBJ)\utils\random.obj \
    $(DIROBJ)\utils\thread.obj \
    $(DIROBJ)\utils\utils.obj \

@ -310,34 +238,19 @@ LIBWEBPMUX_OBJS = $(MUX_OBJS) $(LIBWEBPMUX_OBJS)
 LIBWEBPDEMUX_OBJS = $(DEMUX_OBJS) $(LIBWEBPDEMUX_OBJS)

 OUT_LIBS = $(LIBWEBPDECODER) $(LIBWEBP)
-!IF "$(ARCH)" == "ARM"
-ex: $(OUT_LIBS)
-all: ex
-!ELSE
 OUT_EXAMPLES = $(DIRBIN)\cwebp.exe $(DIRBIN)\dwebp.exe
 EXTRA_EXAMPLES = $(DIRBIN)\vwebp.exe $(DIRBIN)\webpmux.exe

 ex: $(OUT_LIBS) $(OUT_EXAMPLES)
 all: ex $(EXTRA_EXAMPLES)
-# NB: gif2webp.exe and anim_diff.exe are excluded from 'all' as libgif requires
-# C99 support which is only available from VS2013 onward.
-gif2webp: $(DIRBIN)\gif2webp.exe
-anim_diff: $(DIRBIN)\anim_diff.exe
-
-$(DIRBIN)\anim_diff.exe: $(DIROBJ)\examples\anim_diff.obj $(EX_ANIM_UTIL_OBJS)
-$(DIRBIN)\anim_diff.exe: $(EX_UTIL_OBJS)
-$(DIRBIN)\anim_diff.exe: $(EX_GIF_DEC_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
 $(DIRBIN)\cwebp.exe: $(DIROBJ)\examples\cwebp.obj $(EX_FORMAT_DEC_OBJS)
 $(DIRBIN)\dwebp.exe: $(DIROBJ)\examples\dwebp.obj
-$(DIRBIN)\gif2webp.exe: $(DIROBJ)\examples\gif2webp.obj $(EX_GIF_DEC_OBJS)
-$(DIRBIN)\gif2webp.exe: $(EX_UTIL_OBJS) $(LIBWEBPMUX) $(LIBWEBP)
 $(DIRBIN)\vwebp.exe: $(DIROBJ)\examples\vwebp.obj
 $(DIRBIN)\vwebp.exe: $(EX_UTIL_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
 $(DIRBIN)\webpmux.exe: $(DIROBJ)\examples\webpmux.obj $(LIBWEBPMUX)
 $(DIRBIN)\webpmux.exe: $(EX_UTIL_OBJS) $(LIBWEBP)
 $(OUT_EXAMPLES): $(EX_UTIL_OBJS) $(LIBWEBP)
 $(EX_UTIL_OBJS) $(EX_FORMAT_DEC_OBJS): $(OUTPUT_DIRS)
-!ENDIF  # ARCH == ARM

 experimental:
 	$(MAKE) /f Makefile.vc \
@ -353,7 +266,7 @@ $(LIBWEBP_OBJS) $(LIBWEBPMUX_OBJS) $(LIBWEBPDEMUX_OBJS): $(OUTPUT_DIRS)

 !IF "$(DLLBUILD)" == "TRUE"
 $(LIBWEBP_OBJS) $(LIBWEBPMUX_OBJS) $(LIBWEBPDEMUX_OBJS): \
-    $(DIROBJ)\$(DLLINC)
+    $(DIROBJ)\$(DLLINC) $(DIROBJ)\$(DLLC)

 {$(DIROBJ)}.c{$(DIROBJ)}.obj:
 	$(CC) $(CFLAGS) /Fd$(LIBWEBP_PDBNAME) /Fo$@  $<
@ -366,7 +279,7 @@ $(LIBWEBPDECODER) $(LIBWEBP) $(LIBWEBPMUX) $(LIBWEBPDEMUX):
 	-xcopy $(DIROBJ)\*.pdb $(DIRLIB) /y

 clean::
-	@-erase /s $(DIROBJ)\$(DLLINC) 2> NUL
+	@-erase /s $(DIROBJ)\$(DLLC) $(DIROBJ)\$(DLLINC) 2> NUL
 !ELSE
 $(LIBWEBPDECODER) $(LIBWEBP) $(LIBWEBPMUX) $(LIBWEBPDEMUX):
 	$(LNKLIB) /out:$@ $**
@ -383,27 +296,19 @@ $(DIROBJ)\$(DLLINC):
 	@echo #define WEBP_EXTERN(type) __declspec(dllexport) type >> $@
 	@echo #endif  /* WEBP_DLL_H_ */ >> $@

+# expose a WebPFree() function for use in managed code
+$(DIROBJ)\$(DLLC): $(DIROBJ)\$(DLLINC)
+	@echo #include ^<stdlib.h^> > $@
+	@echo #include "webp_dll.h" >> $@
+	@echo // This function should be used in place of free() for memory >> $@
+	@echo // returned by the WebP API. >> $@
+	@echo WEBP_EXTERN(void) WebPFree(void* ptr) { >> $@
+	@echo   free(ptr); >> $@
+	@echo } >> $@
+
 .SUFFIXES: .c .obj .res .exe
-# File-specific flag builds. Note batch rules take precedence over wildcards,
-# so for now name each file individually.
-$(DIROBJ)\dsp\enc_avx2.obj: src\dsp\enc_avx2.c
-	$(CC) $(CFLAGS) $(AVX2_FLAGS) /Fd$(LIBWEBP_PDBNAME) /Fo$(DIROBJ)\dsp\ \
-	  src\dsp\$(@B).c
-$(DIROBJ)\examples\anim_diff.obj: examples\anim_diff.c
-	$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
-	  /Fo$(DIROBJ)\examples\ examples\$(@B).c
-$(DIROBJ)\examples\anim_util.obj: examples\anim_util.c
-	$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
-	  /Fo$(DIROBJ)\examples\ examples\$(@B).c
-$(DIROBJ)\examples\gif2webp.obj: examples\gif2webp.c
-	$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
-	  /Fo$(DIROBJ)\examples\ examples\$(@B).c
-$(DIROBJ)\examples\gifdec.obj: examples\gifdec.c
-	$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
-	  /Fo$(DIROBJ)\examples\ examples\$(@B).c
-# Batch rules
 {examples}.c{$(DIROBJ)\examples}.obj::
-	$(CC) $(CFLAGS) /Fd$(DIROBJ)\examples\ /Fo$(DIROBJ)\examples\ $<
+	$(CC) $(CFLAGS) /Fd$(DIROBJ)\examples\ /Fo$(DIROBJ)\examples\  $<
 {src\dec}.c{$(DIROBJ)\dec}.obj::
 	$(CC) $(CFLAGS) /Fd$(LIBWEBP_PDBNAME) /Fo$(DIROBJ)\dec\ $<
 {src\demux}.c{$(DIROBJ)\demux}.obj::
--- a/74
+++ b/74
@ -1,74 +1,3 @@
- 12/17/2015: version 0.5.0
-  * miscellaneous bug & build fixes (issues #234, #258, #274, #275, #278)
-  * encoder & decoder speed-ups on x86/ARM/MIPS for lossy & lossless
-    - note! YUV->RGB conversion was sped-up, but the results will be slightly
-      different from previous releases
-  * various lossless encoder improvements
-  * gif2webp improvements, -min_size option added
-  * tools fully support input from stdin and output to stdout (issue #168)
-  * New WebPAnimEncoder API for creating animations
-  * New WebPAnimDecoder API for decoding animations
-  * other API changes:
-    - libwebp:
-      WebPPictureSmartARGBToYUVA() (-pre 4 in cwebp)
-      WebPConfig::exact (-exact in cwebp; -alpha_cleanup is now the default)
-      WebPConfig::near_lossless (-near_lossless in cwebp)
-      WebPFree() (free'ing webp allocated memory in other languages)
-      WebPConfigLosslessPreset()
-      WebPMemoryWriterClear()
-    - libwebpdemux: removed experimental fragment related fields and functions
-    - libwebpmux: WebPMuxSetCanvasSize()
-  * new libwebpextras library with some uncommon import functions:
-    WebPImportGray/WebPImportRGB565/WebPImportRGB4444
-
- 10/15/15: version 0.4.4
-  This is a binary compatible release.
-  * rescaling out-of-bounds read fix (issue #254)
-  * various build fixes and improvements (issues #253, #259, #262, #267, #268)
-  * container documentation update
-  * gif2webp transparency fix (issue #245)
-
- 3/3/15: version 0.4.3
-  This is a binary compatible release.
-  * Android / gcc / iOS / MSVS build fixes and improvements
-  * lossless decode fix (issue #239 -- since 0.4.0)
-  * documentation / vwebp updates for animation
-  * multi-threading fix (issue #234)
-
- 10/13/14: version 0.4.2
-  This is a binary compatible release.
-  * Android / gcc build fixes
-  * (Windows) fix reading from stdin and writing to stdout
-  * gif2webp: miscellaneous fixes
-  * fix 'alpha-leak' with lossy compression (issue #220)
-  * the lossless bitstream spec has been amended to reflect the current code
-
- 7/24/14: version 0.4.1
-  This is a binary compatible release.
-  * AArch64 (arm64) & MIPS support/optimizations
-  * NEON assembly additions:
-    - ~25% faster lossy decode / encode (-m 4)
-    - ~10% faster lossless decode
-    - ~5-10% faster lossless encode (-m 3/4)
-  * dwebp/vwebp can read from stdin
-  * cwebp/gif2webp can write to stdout
-  * cwebp can read webp files; useful if storing sources as webp lossless
-
- 12/19/13: version 0.4.0
-  * improved gif2webp tool
-  * numerous fixes, compression improvement and speed-up
-  * dither option added to decoder (dwebp -dither 50 ...)
-  * improved multi-threaded modes (-mt option)
-  * improved filtering strength determination
-  * New function: WebPMuxGetCanvasSize
-  * BMP and TIFF format output added to 'dwebp'
-  * Significant memory reduction for decoding lossy images with alpha.
-  * Intertwined decoding of RGB and alpha for a shorter
-    time-to-first-decoded-pixel.
-  * WebPIterator has a new member 'has_alpha' denoting whether the frame
-    contains transparency.
-  * Container spec amended with new 'blending method' for animation.
-
 - 6/13/13: version 0.3.1
  This is a binary compatible release.
  * Add incremental decoding support for images containing ALPH and ICCP chunks.
@ -119,8 +48,7 @@
 - 9/19/11: version 0.1.3
  * Advanced decoding APIs.
  * On-the-fly cropping and rescaling of images.
-  * SSE2 instructions for decoding performance optimizations on x86 based
-    platforms.
+  * SSE2 instructions for decoding performance optimizations on x86 based platforms.
  * Support Multi-threaded decoding.
  * 40% improvement in Decoding performance.
  * Add support for RGB565, RGBA4444 & ARGB image colorspace.
--- a/39
+++ b/39
@ -1,23 +1,22 @@
 Additional IP Rights Grant (Patents)
------------------------------------

-"These implementations" means the copyrightable works that implement the WebM
-codecs distributed by Google as part of the WebM Project.
+"This implementation" means the copyrightable works distributed by
+Google as part of the WebM Project.

-Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
-royalty-free, irrevocable (except as stated in this section) patent license to
-make, have made, use, offer to sell, sell, import, transfer, and otherwise
-run, modify and propagate the contents of these implementations of WebM, where
-such license applies only to those patent claims, both currently owned by
-Google and acquired in the future, licensable by Google that are necessarily
-infringed by these implementations of WebM. This grant does not include claims
-that would be infringed only as a consequence of further modification of these
-implementations. If you or your agent or exclusive licensee institute or order
-or agree to the institution of patent litigation or any other patent
-enforcement activity against any entity (including a cross-claim or
-counterclaim in a lawsuit) alleging that any of these implementations of WebM
-or any code incorporated within any of these implementations of WebM
-constitute direct or contributory patent infringement, or inducement of
-patent infringement, then any patent rights granted to you under this License
-for these implementations of WebM shall terminate as of the date such
-litigation is filed.
+Google hereby grants to you a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer, and otherwise run, modify and propagate the contents of this
+implementation of VP8, where such license applies only to those patent
+claims, both currently owned by Google and acquired in the future,
+licensable by Google that are necessarily infringed by this
+implementation of VP8. This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation. If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of VP8 or any code incorporated within this
+implementation of VP8 constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of VP8
+shall terminate as of the date such litigation is filed.
--- a/207
+++ b/207
@ -4,7 +4,7 @@
          \__\__/\____/\_____/__/ ____  ___
                / _/ /    \    \ /  _ \/ _/
               /  \_/   / /   \ \   __/  \__
-               \____/____/\_____/_____/____/v0.5.0
+               \____/____/\_____/_____/____/v0.3.1

 Description:
 ============
@ -15,8 +15,7 @@ as well as the command line tools 'cwebp' and 'dwebp'.

 See http://developers.google.com/speed/webp

-The latest source tree is available at
-https://chromium.googlesource.com/webm/libwebp
+Latest sources are available from http://www.webmproject.org/code/

 It is released under the same license as the WebM project.
 See http://www.webmproject.org/license/software/ or the
@ -54,12 +53,6 @@ Please refer to makefile.unix for additional details and customizations.

 Using autoconf tools:
 ---------------------
-Prerequisites:
-A compiler (e.g., gcc), make, autoconf, automake, libtool.
-On a Debian-like system the following should install everything you need for a
-minimal build:
-$ sudo apt-get install gcc make autoconf automake libtool
-
 When building from git sources, you will need to run autogen.sh to generate the
 configure script.

@ -87,7 +80,7 @@ more options.
 SWIG bindings:
 --------------

-To generate language bindings from swig/libwebp.swig at least swig-1.3
+To generate language bindings from swig/libwebp.i at least swig-1.3
 (http://www.swig.org) is required.

 Currently the following functions are mapped:
@ -122,7 +115,7 @@ DLL that can be loaded via System.loadLibrary("webp_jni").
 Python bindings:

 To build the swig-generated Python extension code at least Python 2.6 is
-required. Python < 2.6 may build with some minor changes to libwebp.swig or the
+required. Python < 2.6 may build with some minor changes to libwebp.i or the
 generated code, but is untested.

 Encoding tool:
@ -147,32 +140,28 @@ A longer list of options is available using the -longhelp command line flag:
 Usage:
 cwebp [-preset <...>] [options] in_file [-o out_file]

-If input size (-s) for an image is not specified, it is
-assumed to be a PNG, JPEG, TIFF or WebP file.
-
-Options:
+If input size (-s) for an image is not specified, it is assumed to be a PNG,
+JPEG or TIFF file.
+options:
  -h / -help  ............ short help
  -H / -longhelp  ........ long help
  -q <float> ............. quality factor (0:small..100:big)
-  -alpha_q <int> ......... transparency-compression quality (0..100)
-  -preset <string> ....... preset setting, one of:
+  -alpha_q <int> ......... Transparency-compression quality (0..100).
+  -preset <string> ....... Preset setting, one of:
                            default, photo, picture,
                            drawing, icon, text
-     -preset must come first, as it overwrites other parameters
-  -z <int> ............... activates lossless preset with given
-                           level in [0:fast, ..., 9:slowest]
-
+     -preset must come first, as it overwrites other parameters.
  -m <int> ............... compression method (0=fast, 6=slowest)
  -segments <int> ........ number of segments to use (1..4)
-  -size <int> ............ target size (in bytes)
-  -psnr <float> .......... target PSNR (in dB. typically: 42)
+  -size <int> ............ Target size (in bytes)
+  -psnr <float> .......... Target PSNR (in dB. typically: 42)

-  -s <int> <int> ......... input size (width x height) for YUV
-  -sns <int> ............. spatial noise shaping (0:off, 100:max)
+  -s <int> <int> ......... Input size (width x height) for YUV
+  -sns <int> ............. Spatial Noise Shaping (0:off, 100:max)
  -f <int> ............... filter strength (0=off..100)
  -sharpness <int> ....... filter sharpness (0:most .. 7:least sharp)
-  -strong ................ use strong filter instead of simple (default)
-  -nostrong .............. use simple filter instead of strong
+  -strong ................ use strong filter instead of simple (default).
+  -nostrong .............. use simple filter instead of strong.
  -partition_limit <int> . limit quality to fit the 512k limit on
                           the first partition (0=no degradation ... 100=full)
  -pass <int> ............ analysis pass number (1..10)
@ -180,42 +169,37 @@ Options:
  -resize <w> <h> ........ resize picture (after any cropping)
  -mt .................... use multi-threading if available
  -low_memory ............ reduce memory usage (slower encoding)
-  -map <int> ............. print map of extra info
-  -print_psnr ............ prints averaged PSNR distortion
-  -print_ssim ............ prints averaged SSIM distortion
-  -print_lsim ............ prints local-similarity distortion
-  -d <file.pgm> .......... dump the compressed output (PGM file)
-  -alpha_method <int> .... transparency-compression method (0..1)
-  -alpha_filter <string> . predictive filtering for alpha plane,
-                           one of: none, fast (default) or best
-  -exact ................. preserve RGB values in transparent area
-  -blend_alpha <hex> ..... blend colors against background color
-                           expressed as RGB values written in
-                           hexadecimal, e.g. 0xc0e0d0 for red=0xc0
-                           green=0xe0 and blue=0xd0
-  -noalpha ............... discard any transparency information
-  -lossless .............. encode image losslessly
-  -near_lossless <int> ... use near-lossless image
-                           preprocessing (0..100=off)
-  -hint <string> ......... specify image characteristics hint,
-                           one of: photo, picture or graph
+  -map <int> ............. print map of extra info.
+  -print_psnr ............ prints averaged PSNR distortion.
+  -print_ssim ............ prints averaged SSIM distortion.
+  -print_lsim ............ prints local-similarity distortion.
+  -d <file.pgm> .......... dump the compressed output (PGM file).
+  -alpha_method <int> .... Transparency-compression method (0..1)
+  -alpha_filter <string> . predictive filtering for alpha plane.
+                           One of: none, fast (default) or best.
+  -alpha_cleanup ......... Clean RGB values in transparent area.
+  -noalpha ............... discard any transparency information.
+  -lossless .............. Encode image losslessly.
+  -hint <string> ......... Specify image characteristics hint.
+                           One of: photo, picture or graph

  -metadata <string> ..... comma separated list of metadata to
                           copy from the input to the output if present.
                           Valid values: all, none (default), exif, icc, xmp

  -short ................. condense printed message
-  -quiet ................. don't print anything
-  -version ............... print version number and exit
-  -noasm ................. disable all assembly optimizations
+  -quiet ................. don't print anything.
+  -version ............... print version number and exit.
+  -noasm ................. disable all assembly optimizations.
  -v ..................... verbose, e.g. print encoding/decoding times
  -progress .............. report encoding progress

 Experimental Options:
-  -jpeg_like ............. roughly match expected JPEG size
-  -af .................... auto-adjust filter strength
+  -jpeg_like ............. Roughly match expected JPEG size.
+  -af .................... auto-adjust filter strength.
  -pre <int> ............. pre-processing filter

+
 The main options you might want to try in order to further tune the
 visual quality are:
 -preset
@ -267,29 +251,21 @@ Decodes the WebP image file to PNG format [Default]
 Use following options to convert into alternate image formats:
  -pam ......... save the raw RGBA samples as a color PAM
  -ppm ......... save the raw RGB samples as a color PPM
-  -bmp ......... save as uncompressed BMP format
-  -tiff ........ save as uncompressed TIFF format
  -pgm ......... save the raw YUV samples as a grayscale PGM
-                 file with IMC4 layout
-  -yuv ......... save the raw YUV samples in flat layout
+                 file with IMC4 layout.
+  -yuv ......... save the raw YUV samples in flat layout.

 Other options are:
-  -version  .... print version number and exit
-  -nofancy ..... don't use the fancy YUV420 upscaler
-  -nofilter .... disable in-loop filtering
-  -nodither .... disable dithering
-  -dither <d> .. dithering strength (in 0..100)
-  -alpha_dither  use alpha-plane dithering if needed
+  -version  .... print version number and exit.
+  -nofancy ..... don't use the fancy YUV420 upscaler.
+  -nofilter .... disable in-loop filtering.
  -mt .......... use multi-threading
  -crop <x> <y> <w> <h> ... crop output with the given rectangle
-  -resize <w> <h> ......... scale the output (*after* any cropping)
-  -flip ........ flip the output vertically
-  -alpha ....... only save the alpha plane
-  -incremental . use incremental decoding (useful for tests)
-  -h     ....... this help message
+  -scale <w> <h> .......... scale the output (*after* any cropping)
+  -alpha ....... only save the alpha plane.
+  -h     ....... this help message.
  -v     ....... verbose (e.g. print encoding/decoding times)
-  -quiet ....... quiet mode, don't print anything
-  -noasm ....... disable all assembly optimizations
+  -noasm ....... disable all assembly optimizations.

 Visualization tool:
 ===================
@ -303,20 +279,18 @@ Usage: vwebp in_file [options]

 Decodes the WebP image file and visualize it using OpenGL
 Options are:
-  -version  .... print version number and exit
-  -noicc ....... don't use the icc profile if present
-  -nofancy ..... don't use the fancy YUV420 upscaler
-  -nofilter .... disable in-loop filtering
-  -dither <int>  dithering strength (0..100), default=50
-  -noalphadither disable alpha plane dithering
-  -mt .......... use multi-threading
-  -info ........ print info
-  -h     ....... this help message
+  -version  .... print version number and exit.
+  -noicc ....... don't use the icc profile if present.
+  -nofancy ..... don't use the fancy YUV420 upscaler.
+  -nofilter .... disable in-loop filtering.
+  -mt .......... use multi-threading.
+  -info ........ print info.
+  -h     ....... this help message.

 Keyboard shortcuts:
-  'c' ................ toggle use of color profile
-  'i' ................ overlay file information
-  'q' / 'Q' / ESC .... quit
+  'c' ................ toggle use of color profile.
+  'i' ................ overlay file information.
+  'q' / 'Q' / ESC .... quit.

 Building:
 ---------
@ -344,70 +318,6 @@ $ make -f makefile.unix examples/vwebp
 > nmake /f Makefile.vc CFG=release-static \
    ../obj/x64/release-static/bin/vwebp.exe

-Animated GIF conversion:
-========================
-Animated GIF files can be converted to WebP files with animation using the
-gif2webp utility available under examples/. The files can then be viewed using
-vwebp.
-
-Usage:
- gif2webp [options] gif_file -o webp_file
-Options:
-  -h / -help  ............ this help
-  -lossy ................. encode image using lossy compression
-  -mixed ................. for each frame in the image, pick lossy
-                           or lossless compression heuristically
-  -q <float> ............. quality factor (0:small..100:big)
-  -m <int> ............... compression method (0=fast, 6=slowest)
-  -min_size .............. minimize output size (default:off)
-                           lossless compression by default; can be
-                           combined with -q, -m, -lossy or -mixed
-                           options
-  -kmin <int> ............ min distance between key frames
-  -kmax <int> ............ max distance between key frames
-  -f <int> ............... filter strength (0=off..100)
-  -metadata <string> ..... comma separated list of metadata to
-                           copy from the input to the output if present
-                           Valid values: all, none, icc, xmp (default)
-  -mt .................... use multi-threading if available
-
-  -version ............... print version number and exit
-  -v ..................... verbose
-  -quiet ................. don't print anything
-
-Building:
---------
-With the libgif development files installed, gif2webp can be built using
-makefile.unix:
-$ make -f makefile.unix examples/gif2webp
-
-or using autoconf:
-$ ./configure --enable-everything
-$ make
-
-Comparison of animated images:
-==============================
-Test utility anim_diff under examples/ can be used to compare two animated
-images (each can be GIF or WebP).
-
-Usage: anim_diff <image1> <image2> [options]
-
-Options:
-  -dump_frames <folder> dump decoded frames in PAM format
-  -min_psnr <float> ... minimum per-frame PSNR
-  -raw_comparison ..... if this flag is not used, RGB is
-                        premultiplied before comparison
-
-Building:
---------
-With the libgif development files and a C++ compiler installed, anim_diff can
-be built using makefile.unix:
-$ make -f makefile.unix examples/anim_diff
-
-or using autoconf:
-$ ./configure --enable-everything
-$ make
-
 Encoding API:
 =============

@ -485,20 +395,15 @@ The encoding flow looks like:

  // Set up a byte-output write method. WebPMemoryWriter, for instance.
  WebPMemoryWriter wrt;
-  WebPMemoryWriterInit(&wrt);     // initialize 'wrt'
-
  pic.writer = MyFileWriter;
  pic.custom_ptr = my_opaque_structure_to_make_MyFileWriter_work;
+  // initialize 'wrt' here...

  // Compress!
  int ok = WebPEncode(&config, &pic);   // ok = 0 => error occurred!
  WebPPictureFree(&pic);  // must be called independently of the 'ok' result.

  // output data should have been handled by the writer at that point.
-  // -> compressed data is the memory buffer described by wrt.mem / wrt.size
-
-  // deallocate the memory used by compressed data
-  WebPMemoryWriterClear(&wrt);

 -------------------------------------- END PSEUDO EXAMPLE

@ -638,7 +543,7 @@ Bugs:
 =====

 Please report all bugs to our issue tracker:
-    https://bugs.chromium.org/p/webp
+    http://code.google.com/p/webp/issues
 Patches welcome! See this page to get started:
    http://www.webmproject.org/code/contribute/submitting-patches/

--- a/README.mux
+++ b/README.mux
@ -1,7 +1,7 @@
          __   __  ____  ____  ____  __ __  _     __ __
         /  \\/  \/  _ \/  _ \/  _ \/  \  \/ \___/_ / _\
         \       /   __/  _  \   __/      /  /  (_/  /__
-          \__\__/\_____/_____/__/  \__//_/\_____/__/___/v0.3.0
+          \__\__/\_____/_____/__/  \__//_/\_____/__/___/v0.1.1


 Description:
@ -33,35 +33,34 @@ Usage: webpmux -get GET_OPTIONS INPUT -o OUTPUT
       webpmux -version

 GET_OPTIONS:
- Extract relevant data:
-   icc       get ICC profile
-   exif      get EXIF metadata
-   xmp       get XMP metadata
-   frame n   get nth frame
+ Extract relevant data.
+   icc       Get ICC profile.
+   exif      Get EXIF metadata.
+   xmp       Get XMP metadata.
+   frame n   Get nth frame.

 SET_OPTIONS:
- Set color profile/metadata:
-   icc  file.icc     set ICC profile
-   exif file.exif    set EXIF metadata
-   xmp  file.xmp     set XMP metadata
+ Set color profile/metadata.
+   icc  file.icc     Set ICC profile.
+   exif file.exif    Set EXIF metadata.
+   xmp  file.xmp     Set XMP metadata.
   where:    'file.icc' contains the ICC profile to be set,
             'file.exif' contains the EXIF metadata to be set
             'file.xmp' contains the XMP metadata to be set

 STRIP_OPTIONS:
- Strip color profile/metadata:
-   icc       strip ICC profile
-   exif      strip EXIF metadata
-   xmp       strip XMP metadata
+ Strip color profile/metadata.
+   icc       Strip ICC profile.
+   exif      Strip EXIF metadata.
+   xmp       Strip XMP metadata.

 FRAME_OPTIONS(i):
- Create animation:
-   file_i +di+[xi+yi[+mi[bi]]]
+ Create animation.
+   file_i +di+xi+yi+mi
   where:    'file_i' is the i'th animation frame (WebP format),
-             'di' is the pause duration before next frame,
-             'xi','yi' specify the image offset for this frame,
-             'mi' is the dispose method for this frame (0 or 1),
-             'bi' is the blending method for this frame (+b or -b)
+             'di' is the pause duration before next frame.
+             'xi','yi' specify the image offset for this frame.
+             'mi' is the dispose method for this frame (0 or 1).

 LOOP_COUNT:
 Number of times to repeat the animation.
@ -72,7 +71,7 @@ BACKGROUND_COLOR:
  A,R,G,B
  where:    'A', 'R', 'G' and 'B' are integers in the range 0 to 255 specifying
            the Alpha, Red, Green and Blue component values respectively
-            [Default: 255,255,255,255]
+            [Default: 255,255,255,255].

 INPUT & OUTPUT are in WebP format.

@ -133,7 +132,7 @@ WebP files. This API currently supports reading of XMP/EXIF metadata, ICC
 profile and animated images. Other features may be added in subsequent
 releases.

-Code example: Demuxing WebP data to extract all the frames, ICC profile
+Code Example: Demuxing WebP data to extract all the frames, ICC profile
 and EXIF/XMP metadata.

  WebPDemuxer* demux = WebPDemux(&webp_data);
@ -170,36 +169,12 @@ and EXIF/XMP metadata.
 For a detailed Demux API reference, please refer to the header file
 (src/webp/demux.h).

-AnimEncoder API:
-================
-The AnimEncoder API can be used to create animated WebP images.
-
-Code example:
-
-  WebPAnimEncoderOptions enc_options;
-  WebPAnimEncoderOptionsInit(&enc_options);
-  // ... (Tune 'enc_options' as needed).
-  WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &enc_options);
-  while(<there are more frames>) {
-    WebPConfig config;
-    WebPConfigInit(&config);
-    // ... (Tune 'config' as needed).
-    WebPAnimEncoderAdd(enc, frame, duration, &config);
-  }
-  WebPAnimEncoderAssemble(enc, webp_data);
-  WebPAnimEncoderDelete(enc);
-  // ... (Write the 'webp_data' to a file, or re-mux it further).
-
-
-For a detailed AnimEncoder API reference, please refer to the header file
-(src/webp/mux.h).
-

 Bugs:
 =====

 Please report all bugs to our issue tracker:
-    https://bugs.chromium.org/p/webp
+    http://code.google.com/p/webp/issues
 Patches welcome! See this page to get started:
    http://www.webmproject.org/code/contribute/submitting-patches/

--- a/configure.ac
+++ b/configure.ac
@ -1,8 +1,7 @@
-AC_INIT([libwebp], [0.5.0],
-        [https://bugs.chromium.org/p/webp],,
+AC_INIT([libwebp], [0.3.1],
+        [http://code.google.com/p/webp/issues],,
        [http://developers.google.com/speed/webp])
-AC_CANONICAL_HOST
-AC_PREREQ([2.60])
+AC_CANONICAL_TARGET
 AM_INIT_AUTOMAKE([-Wall foreign subdir-objects])

 dnl === automake >= 1.12 requires this for 'unusual archivers' support.
@ -15,9 +14,6 @@ AM_PROG_CC_C_O
 dnl === Enable less verbose output when building.
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])

-dnl == test endianness
-AC_C_BIGENDIAN
-
 dnl === SET_IF_UNSET(shell_var, value)
 dnl ===   Set the shell variable 'shell_var' to 'value' if it is unset.
 AC_DEFUN([SET_IF_UNSET], [test "${$1+set}" = "set" || $1=$2])
@ -28,156 +24,40 @@ AC_ARG_ENABLE([everything],
                              disabled with --disable-target]),
              [SET_IF_UNSET([enable_libwebpdecoder], [$enableval])
               SET_IF_UNSET([enable_libwebpdemux], [$enableval])
-               SET_IF_UNSET([enable_libwebpextras], [$enableval])
               SET_IF_UNSET([enable_libwebpmux], [$enableval])])

-dnl === If --enable-asserts is not defined, define NDEBUG
-
-AC_MSG_CHECKING(whether asserts are enabled)
-AC_ARG_ENABLE([asserts],
-              AS_HELP_STRING([--enable-asserts],
-                             [Enable assert checks]))
-if test "x${enable_asserts-no}" = "xno"; then
-  AM_CPPFLAGS="${AM_CPPFLAGS} -DNDEBUG"
-fi
-AC_MSG_RESULT(${enable_asserts-no})
-AC_SUBST([AM_CPPFLAGS])
-
 AC_ARG_WITH([pkgconfigdir], AS_HELP_STRING([--with-pkgconfigdir=DIR],
            [Path to the pkgconfig directory @<:@LIBDIR/pkgconfig@:>@]),
            [pkgconfigdir="$withval"], [pkgconfigdir='${libdir}/pkgconfig'])
 AC_SUBST([pkgconfigdir])

-dnl === TEST_AND_ADD_CFLAGS(var, flag)
-dnl ===   Checks whether $CC supports 'flag' and adds it to 'var'
-dnl ===   on success.
+dnl === TEST_AND_ADD_CFLAGS(flag)
+dnl ===   Checks whether $CC supports 'flag' and adds it to AM_CFLAGS on success.
 AC_DEFUN([TEST_AND_ADD_CFLAGS],
         [SAVED_CFLAGS="$CFLAGS"
-          CFLAGS="-Werror $2"
-          AC_MSG_CHECKING([whether $CC supports $2])
+          CFLAGS="-Werror $1"
+          AC_MSG_CHECKING([whether $CC supports $1])
          dnl Note AC_LANG_PROGRAM([]) uses an old-style main definition.
          AC_COMPILE_IFELSE([AC_LANG_SOURCE([int main(void) { return 0; }])],
                            [AC_MSG_RESULT([yes])]
                            dnl Simply append the variable avoiding a
                            dnl compatibility ifdef for AS_VAR_APPEND as this
                            dnl variable shouldn't grow all that large.
-                            [$1="${$1} $2"],
+                            [AM_CFLAGS="$AM_CFLAGS $1"],
                            [AC_MSG_RESULT([no])])
          CFLAGS="$SAVED_CFLAGS"])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-fvisibility=hidden])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wall])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wdeclaration-after-statement])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wextra])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wformat-nonliteral])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wformat-security])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wmissing-declarations])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wmissing-prototypes])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wold-style-definition])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshadow])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshorten-64-to-32])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunreachable-code])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused-but-set-variable])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wvla])
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62040
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61622
-AS_IF([test "$GCC" = "yes" ], [
-       gcc_version=`$CC -dumpversion`
-       gcc_wht_bug=""
-       case "$host_cpu" in
-         aarch64|arm64)
-          case "$gcc_version" in
-            4.9|4.9.0|4.9.1) gcc_wht_bug=yes ;;
-          esac
-       esac
-       AS_IF([test "$gcc_wht_bug" = "yes"], [
-              TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-frename-registers])])])
-# Use -flax-vector-conversions, if available, when building intrinsics with
-# older versions of gcc. The flag appeared in 4.3.x, but if backported, and
-# -fno-lax-vector-conversions is set, errors may occur with the intrinsics
-# files along with the older system includes, e.g., emmintrin.h.
-# Originally observed with cc (GCC) 4.2.1 20070831 patched [FreeBSD] (9.3).
-# https://bugs.chromium.org/p/webp/issues/detail?id=274
-AS_IF([test "$GCC" = "yes" ], [
-       case "$host_cpu" in
-         amd64|i?86|x86_64)
-           AC_COMPILE_IFELSE(
-             dnl only check for -flax-vector-conversions with older gcc, skip
-             dnl clang as it reports itself as 4.2.1, but the flag isn't needed.
-             [AC_LANG_SOURCE([#if !defined(__clang__) && defined(__GNUC__) && \
-                                  ((__GNUC__ << 8) | __GNUC_MINOR__) < 0x403
-                              #error old gcc
-                              #endif
-                              int main(void) { return 0; }
-                             ])],,
-              [TEST_AND_ADD_CFLAGS([INTRINSICS_CFLAGS],
-                                   [-flax-vector-conversions])])
-           ;;
-       esac])
+TEST_AND_ADD_CFLAGS([-Wall])
+TEST_AND_ADD_CFLAGS([-Wdeclaration-after-statement])
+TEST_AND_ADD_CFLAGS([-Wextra])
+TEST_AND_ADD_CFLAGS([-Wmissing-declarations])
+TEST_AND_ADD_CFLAGS([-Wmissing-prototypes])
+TEST_AND_ADD_CFLAGS([-Wold-style-definition])
+TEST_AND_ADD_CFLAGS([-Wshadow])
+TEST_AND_ADD_CFLAGS([-Wunused-but-set-variable])
+TEST_AND_ADD_CFLAGS([-Wunused])
+TEST_AND_ADD_CFLAGS([-Wvla])
 AC_SUBST([AM_CFLAGS])

-dnl === Check for machine specific flags
-AC_ARG_ENABLE([avx2],
-              AS_HELP_STRING([--disable-avx2],
-                             [Disable detection of AVX2 support
-                              @<:@default=auto@:>@]))
-
-AS_IF([test "x$enable_avx2" != "xno" -a "x$enable_sse4_1" != "xno" \
-         -a "x$enable_sse2" != "xno"], [
-  AVX2_CFLAGS="$INTRINSICS_CFLAGS $AVX2_FLAGS"
-  TEST_AND_ADD_CFLAGS([AVX2_FLAGS], [-mavx2])
-  AS_IF([test -n "$AVX2_FLAGS"], [
-    SAVED_CFLAGS=$CFLAGS
-    CFLAGS="$CFLAGS $AVX2_FLAGS"
-    AC_CHECK_HEADER([immintrin.h],
-                    [AC_DEFINE(WEBP_HAVE_AVX2, [1],
-                     [Set to 1 if AVX2 is supported])],
-                    [AVX2_FLAGS=""],
-                    dnl it's illegal to directly include avx2intrin.h, but it's
-                    dnl included conditionally in immintrin.h, tricky!
-                    [#ifndef __AVX2__
-                     #error avx2 is not enabled
-                     #endif
-                    ])
-    CFLAGS=$SAVED_CFLAGS])
-  AC_SUBST([AVX2_FLAGS])])
-
-AC_ARG_ENABLE([sse4.1],
-              AS_HELP_STRING([--disable-sse4.1],
-                             [Disable detection of SSE4.1 support
-                              @<:@default=auto@:>@]))
-
-AS_IF([test "x$enable_sse4_1" != "xno" -a "x$enable_sse2" != "xno"], [
-  SSE41_FLAGS="$INTRINSICS_CFLAGS $SSE41_FLAGS"
-  TEST_AND_ADD_CFLAGS([SSE41_FLAGS], [-msse4.1])
-  AS_IF([test -n "$SSE41_FLAGS"], [
-    SAVED_CFLAGS=$CFLAGS
-    CFLAGS="$CFLAGS $SSE41_FLAGS"
-    AC_CHECK_HEADER([smmintrin.h],
-                    [AC_DEFINE(WEBP_HAVE_SSE41, [1],
-                     [Set to 1 if SSE4.1 is supported])],
-                    [SSE41_FLAGS=""])
-    CFLAGS=$SAVED_CFLAGS])
-  AC_SUBST([SSE41_FLAGS])])
-
-AC_ARG_ENABLE([sse2],
-              AS_HELP_STRING([--disable-sse2],
-                             [Disable detection of SSE2 support
-                              @<:@default=auto@:>@]))
-
-AS_IF([test "x$enable_sse2" != "xno"], [
-  SSE2_FLAGS="$INTRINSICS_CFLAGS $SSE2_FLAGS"
-  TEST_AND_ADD_CFLAGS([SSE2_FLAGS], [-msse2])
-  AS_IF([test -n "$SSE2_FLAGS"], [
-    SAVED_CFLAGS=$CFLAGS
-    CFLAGS="$CFLAGS $SSE2_FLAGS"
-    AC_CHECK_HEADER([emmintrin.h],
-                    [AC_DEFINE(WEBP_HAVE_SSE2, [1],
-                     [Set to 1 if SSE2 is supported])],
-                    [SSE2_FLAGS=""])
-    CFLAGS=$SAVED_CFLAGS])
-  AC_SUBST([SSE2_FLAGS])])
-
 dnl === CLEAR_LIBVARS([var_pfx])
 dnl ===   Clears <var_pfx>_{INCLUDES,LIBS}.
 AC_DEFUN([CLEAR_LIBVARS], [$1_INCLUDES=""; $1_LIBS=""])
@ -213,26 +93,6 @@ AC_DEFUN([LIBCHECK_EPILOGUE],
          CPPFLAGS=$SAVED_CPPFLAGS
          LIBS=$SAVED_LIBS])

-dnl === Check for gcc builtins
-
-dnl === CHECK_FOR_BUILTIN([builtin], [param], [define])
-dnl ===   links a C AC_LANG_PROGRAM, with <builtin>(<param>)
-dnl ===   AC_DEFINE'ing <define> if successful.
-AC_DEFUN([CHECK_FOR_BUILTIN],
-         [AC_LANG_PUSH([C])
-          AC_MSG_CHECKING([for $1])
-          AC_LINK_IFELSE([AC_LANG_PROGRAM([], [$1($2)])],
-                         [AC_MSG_RESULT([yes])
-                          AC_DEFINE([$3], [1],
-                                    [Set to 1 if $1 is available])],
-                         [AC_MSG_RESULT([no])]),
-          AC_LANG_POP])
-
-dnl AC_CHECK_FUNC doesn't work with builtin's.
-CHECK_FOR_BUILTIN([__builtin_bswap16], [1u << 15], [HAVE_BUILTIN_BSWAP16])
-CHECK_FOR_BUILTIN([__builtin_bswap32], [1u << 31], [HAVE_BUILTIN_BSWAP32])
-CHECK_FOR_BUILTIN([__builtin_bswap64], [1ull << 63], [HAVE_BUILTIN_BSWAP64])
-
 dnl === Check for pthread support
 AC_ARG_ENABLE([threading],
              AS_HELP_STRING([--disable-threading],
@ -246,260 +106,213 @@ if test "$enable_threading" = "yes"; then
              CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
              CC="$PTHREAD_CC"
             ],
-             [AC_CHECK_FUNC([_beginthreadex],
-                            [AC_DEFINE([WEBP_USE_THREAD], [1],
-                                       [Undefine this to disable thread
-                                        support.])],
-                            [enable_threading=no])])
+             [enable_threading=no])
 fi
 AC_MSG_NOTICE([checking if threading is enabled... ${enable_threading-no}])

 dnl === check for OpenGL/GLUT support ===
+CLEAR_LIBVARS([GL])
+WITHLIB_OPTION([gl], [GL])

-AC_ARG_ENABLE([gl], AS_HELP_STRING([--disable-gl],
-                                   [Disable detection of OpenGL support
-                                    @<:@default=auto@:>@]))
-AS_IF([test "x$enable_gl" != "xno"], [
-  CLEAR_LIBVARS([GL])
-  WITHLIB_OPTION([gl], [GL])
+LIBCHECK_PROLOGUE([GL])

-  LIBCHECK_PROLOGUE([GL])
+glut_cflags="none"
+glut_ldflags="none"
+case $host_os in
+  darwin*)
+    # Special case for OSX builds. Append these to give the user a chance to
+    # override with --with-gl*
+    glut_cflags="$glut_cflags|-framework GLUT -framework OpenGL"
+    glut_ldflags="$glut_ldflags|-framework GLUT -framework OpenGL"
+    ;;
+esac

-  glut_cflags="none"
-  glut_ldflags="none"
-  case $host_os in
-    darwin*)
-      # Special case for OSX builds. Append these to give the user a chance to
-      # override with --with-gl*
-      glut_cflags="$glut_cflags|-framework GLUT -framework OpenGL"
-      glut_ldflags="$glut_ldflags|-framework GLUT -framework OpenGL"
-      ;;
+GLUT_SAVED_CPPFLAGS="$CPPFLAGS"
+SAVED_IFS="$IFS"
+IFS="|"
+for flag in $glut_cflags; do
+  # restore IFS immediately as the autoconf macros may need the default.
+  IFS="$SAVED_IFS"
+  unset ac_cv_header_GL_glut_h
+  unset ac_cv_header_OpenGL_glut_h
+
+  case $flag in
+    none) ;;
+    *) CPPFLAGS="$flag $CPPFLAGS";;
  esac
+  AC_CHECK_HEADERS([GL/glut.h GLUT/glut.h OpenGL/glut.h],
+                   [glut_headers=yes;
+                    test "$flag" = "none" || GL_INCLUDES="$CPPFLAGS";
+                    break])
+  CPPFLAGS="$GLUT_SAVED_CPPFLAGS"
+  test "$glut_headers" = "yes" && break
+done
+IFS="$SAVED_IFS"

-  GLUT_SAVED_CPPFLAGS="$CPPFLAGS"
+if test "$glut_headers" = "yes"; then
+  AC_LANG_PUSH([C])
+  GLUT_SAVED_LDFLAGS="$LDFLAGS"
  SAVED_IFS="$IFS"
  IFS="|"
-  for flag in $glut_cflags; do
+  for flag in $glut_ldflags; do
    # restore IFS immediately as the autoconf macros may need the default.
    IFS="$SAVED_IFS"
-    unset ac_cv_header_GL_glut_h
-    unset ac_cv_header_OpenGL_glut_h
+    unset ac_cv_search_glBegin

    case $flag in
      none) ;;
-      *) CPPFLAGS="$flag $CPPFLAGS";;
+      *) LDFLAGS="$flag $LDFLAGS";;
    esac
-    AC_CHECK_HEADERS([GL/glut.h GLUT/glut.h OpenGL/glut.h],
-                     [glut_headers=yes;
-                      test "$flag" = "none" || GL_INCLUDES="$CPPFLAGS";
-                      break])
-    CPPFLAGS="$GLUT_SAVED_CPPFLAGS"
-    test "$glut_headers" = "yes" && break
+
+    # find libGL
+    GL_SAVED_LIBS="$LIBS"
+    AC_SEARCH_LIBS([glBegin], [GL OpenGL])
+    LIBS="$GL_SAVED_LIBS"
+
+    # A direct link to libGL may not be necessary on e.g., linux.
+    GLUT_SAVED_LIBS="$LIBS"
+    for lib in "" "-lglut" "-lglut $ac_cv_search_glBegin"; do
+      LIBS="$lib"
+      AC_LINK_IFELSE(
+        [AC_LANG_PROGRAM([
+           #ifdef __cplusplus
+           # define EXTERN_C extern "C"
+           #else
+           # define EXTERN_C
+           #endif
+           EXTERN_C char glOrtho();
+           EXTERN_C char glutMainLoop();
+          ],[
+           glOrtho();
+           glutMainLoop();
+          ])
+        ],
+        [glut_support=yes], []
+      )
+      if test "$glut_support" = "yes"; then
+        GL_LIBS="$LDFLAGS $lib"
+        break
+      fi
+    done
+    LIBS="$GLUT_SAVED_LIBS"
+    LDFLAGS="$GLUT_SAVED_LDFLAGS"
+    test "$glut_support" = "yes" && break
  done
  IFS="$SAVED_IFS"
+  AC_LANG_POP
+fi

-  if test "$glut_headers" = "yes"; then
-    AC_LANG_PUSH([C])
-    GLUT_SAVED_LDFLAGS="$LDFLAGS"
-    SAVED_IFS="$IFS"
-    IFS="|"
-    for flag in $glut_ldflags; do
-      # restore IFS immediately as the autoconf macros may need the default.
-      IFS="$SAVED_IFS"
-      unset ac_cv_search_glBegin
+LIBCHECK_EPILOGUE([GL])

-      case $flag in
-        none) ;;
-        *) LDFLAGS="$flag $LDFLAGS";;
-      esac
-
-      # find libGL
-      GL_SAVED_LIBS="$LIBS"
-      AC_SEARCH_LIBS([glBegin], [GL OpenGL opengl32])
-      LIBS="$GL_SAVED_LIBS"
-
-      # A direct link to libGL may not be necessary on e.g., linux.
-      GLUT_SAVED_LIBS="$LIBS"
-      for lib in "" "-lglut" "-lglut $ac_cv_search_glBegin"; do
-        LIBS="$lib"
-        AC_LINK_IFELSE(
-          [AC_LANG_PROGRAM([
-             #ifdef __cplusplus
-             # define EXTERN_C extern "C"
-             #else
-             # define EXTERN_C
-             #endif
-             EXTERN_C char glOrtho();
-             EXTERN_C char glutMainLoop();
-            ],[
-             glOrtho();
-             glutMainLoop();
-            ])
-          ],
-          AC_DEFINE(WEBP_HAVE_GL, [1],
-                    [Set to 1 if OpenGL is supported])
-          [glut_support=yes], []
-        )
-        if test "$glut_support" = "yes"; then
-          GL_LIBS="$LDFLAGS $lib"
-          break
-        fi
-      done
-      LIBS="$GLUT_SAVED_LIBS"
-      LDFLAGS="$GLUT_SAVED_LDFLAGS"
-      test "$glut_support" = "yes" && break
-    done
-    IFS="$SAVED_IFS"
-    AC_LANG_POP
-  fi
-
-  LIBCHECK_EPILOGUE([GL])
-
-  if test "$glut_support" = "yes" -a "$enable_libwebpdemux" = "yes"; then
-    build_vwebp=yes
-  fi
-])
+if test "$glut_support" = "yes" -a "$enable_libwebpdemux" = "yes"; then
+  build_vwebp=yes
+fi
 AM_CONDITIONAL([BUILD_VWEBP], [test "$build_vwebp" = "yes"])

 dnl === check for PNG support ===

-AC_ARG_ENABLE([png], AS_HELP_STRING([--disable-png],
-                                    [Disable detection of PNG format support
-                                     @<:@default=auto@:>@]))
-AS_IF([test "x$enable_png" != "xno"], [
-  CLEAR_LIBVARS([PNG])
-  AC_PATH_PROGS([LIBPNG_CONFIG],
-                [libpng-config libpng16-config libpng15-config libpng14-config \
-                 libpng12-config])
-  if test -n "$LIBPNG_CONFIG"; then
-    PNG_INCLUDES=`$LIBPNG_CONFIG --cflags`
-    PNG_LIBS="`$LIBPNG_CONFIG --ldflags`"
+CLEAR_LIBVARS([PNG])
+AC_PATH_PROGS(LIBPNG_CONFIG,
+              [libpng-config libpng15-config libpng14-config libpng12-config])
+if test -n "$LIBPNG_CONFIG"; then
+  PNG_INCLUDES=`$LIBPNG_CONFIG --cflags`
+  PNG_PREFIX=`$LIBPNG_CONFIG --prefix`
+  if test "${PNG_PREFIX}/lib" != "/usr/lib" ; then
+    PNG_LIBS="-L${PNG_PREFIX}/lib"
  fi
+  PNG_LIBS="$PNG_LIBS `$LIBPNG_CONFIG --libs`"
+fi

-  WITHLIB_OPTION([png], [PNG])
+WITHLIB_OPTION([png], [PNG])

-  LIBCHECK_PROLOGUE([PNG])
-  AC_CHECK_HEADER(png.h,
-    AC_SEARCH_LIBS(png_get_libpng_ver, [png],
-                   [test "$ac_cv_search_png_get_libpng_ver" = "none required" \
-                      || PNG_LIBS="$PNG_LIBS $ac_cv_search_png_get_libpng_ver"
-                    PNG_INCLUDES="$PNG_INCLUDES -DWEBP_HAVE_PNG"
-                    AC_DEFINE(WEBP_HAVE_PNG, [1],
-                              [Set to 1 if PNG library is installed])
-                    png_support=yes
-                   ],
-                   [AC_MSG_WARN(Optional png library not found)
-                    PNG_LIBS=""
-                    PNG_INCLUDES=""
-                   ],
-                   [$MATH_LIBS]),
-    [AC_MSG_WARN(png library not available - no png.h)
-     PNG_LIBS=""
-     PNG_INCLUDES=""
-    ],
-  )
-  LIBCHECK_EPILOGUE([PNG])
-])
+LIBCHECK_PROLOGUE([PNG])
+AC_CHECK_HEADER(png.h,
+  AC_SEARCH_LIBS(png_get_libpng_ver, [png],
+                 [test "$ac_cv_search_png_get_libpng_ver" = "none required" \
+                    || PNG_LIBS="$PNG_LIBS $ac_cv_search_png_get_libpng_ver"
+                  PNG_INCLUDES="$PNG_INCLUDES -DWEBP_HAVE_PNG"
+                  AC_DEFINE(WEBP_HAVE_PNG, [1],
+                            [Set to 1 if PNG library is installed])
+                  png_support=yes
+                 ],
+                 [AC_MSG_WARN(Optional png library not found)
+                  PNG_LIBS=""
+                  PNG_INCLUDES=""
+                 ],
+                 [$MATH_LIBS]),
+  [AC_MSG_WARN(png library not available - no png.h)
+   PNG_LIBS=""
+   PNG_INCLUDES=""
+  ],
+)
+LIBCHECK_EPILOGUE([PNG])

 dnl === check for JPEG support ===

-AC_ARG_ENABLE([jpeg],
-              AS_HELP_STRING([--disable-jpeg],
-                             [Disable detection of JPEG format support
-                              @<:@default=auto@:>@]))
-AS_IF([test "x$enable_jpeg" != "xno"], [
-  CLEAR_LIBVARS([JPEG])
-  WITHLIB_OPTION([jpeg], [JPEG])
+CLEAR_LIBVARS([JPEG])
+WITHLIB_OPTION([jpeg], [JPEG])

-  LIBCHECK_PROLOGUE([JPEG])
-  AC_CHECK_HEADER(jpeglib.h,
-    AC_CHECK_LIB(jpeg, jpeg_set_defaults,
-                 [JPEG_LIBS="$JPEG_LIBS -ljpeg"
-                  JPEG_INCLUDES="$JPEG_INCLUDES -DWEBP_HAVE_JPEG"
-                  AC_DEFINE(WEBP_HAVE_JPEG, [1],
-                            [Set to 1 if JPEG library is installed])
-                  jpeg_support=yes
-                 ],
-                 AC_MSG_WARN(Optional jpeg library not found),
-                 [$MATH_LIBS]),
-    AC_MSG_WARN(jpeg library not available - no jpeglib.h)
-  )
-  LIBCHECK_EPILOGUE([JPEG])
-])
+LIBCHECK_PROLOGUE([JPEG])
+AC_CHECK_HEADER(jpeglib.h,
+  AC_CHECK_LIB(jpeg, jpeg_set_defaults,
+               [JPEG_LIBS="$JPEG_LIBS -ljpeg"
+                JPEG_INCLUDES="$JPEG_INCLUDES -DWEBP_HAVE_JPEG"
+                AC_DEFINE(WEBP_HAVE_JPEG, [1],
+                          [Set to 1 if JPEG library is installed])
+                jpeg_support=yes
+               ],
+               AC_MSG_WARN(Optional jpeg library not found),
+               [$MATH_LIBS]),
+  AC_MSG_WARN(jpeg library not available - no jpeglib.h)
+)
+LIBCHECK_EPILOGUE([JPEG])

 dnl === check for TIFF support ===

-AC_ARG_ENABLE([tiff],
-              AS_HELP_STRING([--disable-tiff],
-                             [Disable detection of TIFF format support
-                              @<:@default=auto@:>@]))
-AS_IF([test "x$enable_tiff" != "xno"], [
-  CLEAR_LIBVARS([TIFF])
-  WITHLIB_OPTION([tiff], [TIFF])
+CLEAR_LIBVARS([TIFF])
+WITHLIB_OPTION([tiff], [TIFF])

-  LIBCHECK_PROLOGUE([TIFF])
-  AC_CHECK_HEADER(tiffio.h,
-    AC_CHECK_LIB(tiff, TIFFGetVersion,
-                 [TIFF_LIBS="$TIFF_LIBS -ltiff"
-                  TIFF_INCLUDES="$TIFF_INCLUDES -DWEBP_HAVE_TIFF"
-                  AC_DEFINE(WEBP_HAVE_TIFF, [1],
-                            [Set to 1 if TIFF library is installed])
-                  tiff_support=yes
-                 ],
-                 AC_MSG_WARN(Optional tiff library not found),
-                 [$MATH_LIBS]),
-    AC_MSG_WARN(tiff library not available - no tiffio.h)
-  )
-  LIBCHECK_EPILOGUE([TIFF])
-])
+LIBCHECK_PROLOGUE([TIFF])
+AC_CHECK_HEADER(tiffio.h,
+  AC_CHECK_LIB(tiff, TIFFGetVersion,
+               [TIFF_LIBS="$TIFF_LIBS -ltiff"
+                TIFF_INCLUDES="$TIFF_INCLUDES -DWEBP_HAVE_TIFF"
+                AC_DEFINE(WEBP_HAVE_TIFF, [1],
+                          [Set to 1 if TIFF library is installed])
+                tiff_support=yes
+               ],
+               AC_MSG_WARN(Optional tiff library not found),
+               [$MATH_LIBS]),
+  AC_MSG_WARN(tiff library not available - no tiffio.h)
+)
+LIBCHECK_EPILOGUE([TIFF])

 dnl === check for GIF support ===

-AC_ARG_ENABLE([gif], AS_HELP_STRING([--disable-gif],
-                                    [Disable detection of GIF format support
-                                     @<:@default=auto@:>@]))
-AS_IF([test "x$enable_gif" != "xno"], [
-  CLEAR_LIBVARS([GIF])
-  WITHLIB_OPTION([gif], [GIF])
+CLEAR_LIBVARS([GIF])
+WITHLIB_OPTION([gif], [GIF])

-  LIBCHECK_PROLOGUE([GIF])
-  AC_CHECK_HEADER(gif_lib.h,
-    AC_CHECK_LIB([gif], [DGifOpenFileHandle],
-                 [GIF_LIBS="$GIF_LIBS -lgif"
-                  AC_DEFINE(WEBP_HAVE_GIF, [1],
-                            [Set to 1 if GIF library is installed])
-                  gif_support=yes
-                 ],
-                 AC_MSG_WARN(Optional gif library not found),
-                 [$MATH_LIBS]),
-    AC_MSG_WARN(gif library not available - no gif_lib.h)
-  )
-  LIBCHECK_EPILOGUE([GIF])
+LIBCHECK_PROLOGUE([GIF])
+AC_CHECK_HEADER(gif_lib.h,
+  AC_CHECK_LIB([gif], [DGifOpenFileHandle],
+               [GIF_LIBS="$GIF_LIBS -lgif"
+                gif_support=yes
+               ],
+               AC_MSG_WARN(Optional gif library not found),
+               [$MATH_LIBS]),
+  AC_MSG_WARN(gif library not available - no gif_lib.h)
+)
+LIBCHECK_EPILOGUE([GIF])

-  if test "$gif_support" = "yes" -a \
-          "$enable_libwebpdemux" = "yes"; then
-    build_animdiff=yes
-  fi
-
-  if test "$gif_support" = "yes" -a \
-          "$enable_libwebpmux" = "yes"; then
-    build_gif2webp=yes
-  fi
-])
-AM_CONDITIONAL([BUILD_ANIMDIFF], [test "${build_animdiff}" = "yes"])
+if test "$gif_support" = "yes" -a \
+        "$enable_libwebpmux" = "yes"; then
+  build_gif2webp=yes
+fi
 AM_CONDITIONAL([BUILD_GIF2WEBP], [test "${build_gif2webp}" = "yes"])

 dnl === check for WIC support ===

-AC_ARG_ENABLE([wic],
-              AS_HELP_STRING([--disable-wic],
-                             [Disable Windows Imaging Component (WIC) detection.
-                              @<:@default=auto@:>@]),,
-              [enable_wic=yes])
-
-case $host_os in
-mingw*)
-if test "$enable_wic" = "yes"; then
+if test "$target_os" = "mingw32"; then
  AC_CHECK_HEADERS([wincodec.h shlwapi.h windows.h])
  if test "$ac_cv_header_wincodec_h" = "yes"; then
    AC_MSG_CHECKING(for Windows Imaging Component support)
@ -539,20 +352,6 @@ if test "$enable_wic" = "yes"; then
    AC_MSG_RESULT(${wic_support-no})
  fi
 fi
-esac
-
-dnl === If --enable-aligned is defined, define WEBP_FORCE_ALIGNED
-
-AC_MSG_CHECKING(if --enable-aligned option is specified)
-AC_ARG_ENABLE([aligned],
-              AS_HELP_STRING([--enable-aligned],
-                             [Force aligned memory operations in non-dsp code
-                              (may be slower)]))
-if test "$enable_aligned" = "yes"; then
-  AC_DEFINE(WEBP_FORCE_ALIGNED, [1],
-            [Define to 1 to force aligned memory operations])
-fi
-AC_MSG_RESULT(${enable_aligned-no})

 dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP

@ -604,26 +403,17 @@ AC_ARG_ENABLE([libwebpdecoder],
 AC_MSG_RESULT(${enable_libwebpdecoder-no})
 AM_CONDITIONAL([BUILD_LIBWEBPDECODER], [test "$enable_libwebpdecoder" = "yes"])

-dnl === Check whether libwebpextras should be built
-AC_MSG_CHECKING(whether libwebpextras is to be built)
-AC_ARG_ENABLE([libwebpextras],
-              AS_HELP_STRING([--enable-libwebpextras],
-                             [Build libwebpextras @<:@default=no@:>@]))
-AC_MSG_RESULT(${enable_libwebpextras-no})
-AM_CONDITIONAL([WANT_EXTRAS], [test "$enable_libwebpextras" = "yes"])
-
 dnl =========================

 AC_CONFIG_MACRO_DIR([m4])
-AC_CONFIG_HEADERS([src/webp/config.h])
+AC_CONFIG_HEADERS([config.h])
 AC_CONFIG_FILES([Makefile src/Makefile man/Makefile \
                 examples/Makefile src/dec/Makefile \
                 src/enc/Makefile src/dsp/Makefile \
                 src/demux/Makefile src/mux/Makefile \
-                 src/utils/Makefile src/extras/Makefile \
+                 src/utils/Makefile \
                 src/libwebp.pc src/libwebpdecoder.pc \
-                 src/demux/libwebpdemux.pc src/mux/libwebpmux.pc \
-                 src/extras/libwebpextras.pc])
+                 src/demux/libwebpdemux.pc src/mux/libwebpmux.pc])


 AC_OUTPUT
@ -634,12 +424,11 @@ WebP Configuration Summary

 Shared libraries: ${enable_shared}
 Static libraries: ${enable_static}
-Threading support: ${enable_threading-no}
+Threaded decode: ${enable_threading-no}
 libwebp: yes
 libwebpdecoder: ${enable_libwebpdecoder-no}
 libwebpdemux: ${enable_libwebpdemux-no}
 libwebpmux: ${enable_libwebpmux-no}
-libwebpextras: ${enable_libwebpextras-no}

 Tools:
 cwebp : yes
@ -654,9 +443,7 @@ dwebp : yes
  =====================
  PNG  : ${png_support-no}
  WIC  : ${wic_support-no}
-GIF support : ${gif_support-no}
-anim_diff   : ${build_animdiff-no}
-gif2webp    : ${build_gif2webp-no}
-webpmux     : ${enable_libwebpmux-no}
-vwebp       : ${build_vwebp-no}
+gif2webp : ${build_gif2webp-no}
+webpmux  : ${enable_libwebpmux-no}
+vwebp    : ${build_vwebp-no}
 ])
--- a/doc/webp-container-spec.txt
+++ b/doc/webp-container-spec.txt
@ -46,16 +46,25 @@ for:
  * **Animation.** An image may have multiple frames with pauses between them,
    making it an animation.

+  * **Image Fragmentation.** A single bitstream in WebP has an inherent
+    limitation for width or height of 2^14 pixels, and, when using VP8, a 512
+    KiB limit on the size of the first compressed partition. To support larger
+    images, the format supports images that are composed of multiple fragments,
+    each encoded as a separate bitstream. All fragments logically form a single
+    image: they have common metadata, color profile, etc. Image fragmentation
+    may also improve efficiency for larger images, e.g., grass can be encoded
+    differently than sky.
+
 The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
 "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
 document are to be interpreted as described in [RFC 2119][].

-Bit numbering in chunk diagrams starts at `0` for the most significant bit
-('MSB 0') as described in [RFC 1166][].
-
 **Note:** Out of the features mentioned above, lossy compression, lossless
 compression, transparency, metadata, color profile and animation are finalized
-and are to be considered stable.
+and are to be considered stable. On the other hand, image fragmentation is
+experimental as of now, and is open to discussion, feedback and comments.
+The same is indicated using annotation "_status: experimental_" in the relevant
+sections of this document.

 Terminology &amp; Basics
 ------------------------
@ -70,7 +79,7 @@ Below are additional terms used throughout this document:
 _Reader/Writer_

 : Code that reads WebP files is referred to as a _reader_, while code that
-  writes them is referred to as a _writer_.
+writes them is referred to as a _writer_.

 _uint16_

@ -92,12 +101,10 @@ _FourCC_
 _1-based_

 : An unsigned integer field storing values offset by `-1`. e.g., Such a field
-  would store value _25_ as _24_.
+would store value _25_ as _24_.

-
-RIFF File Format
+RIFF file format
 ----------------
-
 The WebP file format is based on the RIFF (resource interchange file format)
 document format.

@ -137,8 +144,7 @@ _ChunkHeader('ABCD')_
 chunks that apply to any RIFF file format, while FourCCs specific to a file
 format are all lowercase. WebP does not follow this convention.

-
-WebP File Header
+WebP file header
 ----------------

     0                   1                   2                   3
@ -158,8 +164,8 @@ WebP File Header
 File Size: 32 bits (_uint32_)

 : The size of the file in bytes starting at offset 8. The maximum value of
-  this field is 2^32 minus 10 bytes and thus the size of the whole file is at
-  most 4GiB minus 2 bytes.
+this field is 2^32 minus 10 bytes and thus the size of the whole file is at
+most 4GiB minus 2 bytes.

 'WEBP': 32 bits

@ -171,8 +177,7 @@ the 'WEBP' FourCC. The file SHOULD NOT contain anything after it. As the size
 of any chunk is even, the size given by the RIFF header is also even. The
 contents of individual chunks will be described in the following sections.

-
-Simple File Format (Lossy)
+Simple file format (lossy)
 --------------------------

 This layout SHOULD be used if the image requires _lossy_ encoding and does not
@ -210,8 +215,7 @@ width and height. That is assumed to be the width and height of the canvas.
 The VP8 specification describes how to decode the image into Y'CbCr
 format. To convert to RGB, Rec. 601 SHOULD be used.

-
-Simple File Format (Lossless)
+Simple file format (lossless)
 -----------------------------

 **Note:** Older readers may not support files using the lossless format.
@ -249,8 +253,7 @@ The current specification of the VP8L bitstream can be found at
 contains the VP8L image width and height. That is assumed to be the width
 and height of the canvas.

-
-Extended File Format
+Extended file format
 --------------------

 **Note:** Older readers may not support files using the extended format.
@ -271,15 +274,13 @@ An extended format file consists of:

  * An optional list of [unknown chunks](#unknown-chunks). _\[status: experimental\]_

-For a _still image_, the _image data_ consists of a single frame, which is made
-up of:
+For a _still image_, the _image data_ consists of a single frame, whereas for
+an _animated image_, it consists of multiple frames. More details about frames
+can be found in the [Animation](#animation) section.

-  * An optional [alpha subchunk](#alpha).
-
-  * A [bitstream subchunk](#bitstream-vp8vp8l).
-
-For an _animated image_, the _image data_ consists of multiple frames. More
-details about frames can be found in the [Animation](#animation) section.
+Moreover, each frame can be fragmented or non-fragmented, as will be described
+in the [Extended WebP file header](#extended_header) section. More details about
+fragments can be found in the [Fragments](#fragments) section.

 All chunks SHOULD be placed in the same order as listed above. If a chunk
 appears in the wrong place, the file is invalid, but readers MAY parse the
@ -301,7 +302,7 @@ Extended WebP file header:
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |                      ChunkHeader('VP8X')                      |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    |Rsv|I|L|E|X|A|R|                   Reserved                    |
+    |Rsv|I|L|E|X|A|F|                   Reserved                    |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |          Canvas Width Minus One               |             ...
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@ -319,7 +320,7 @@ ICC profile (I): 1 bit
 Alpha (L): 1 bit

 : Set if any of the frames of the image contain transparency information
-  ("alpha").
+("alpha").

 EXIF metadata (E): 1 bit

@ -332,11 +333,11 @@ XMP metadata (X): 1 bit
 Animation (A): 1 bit

 : Set if this is an animated image. Data in 'ANIM' and 'ANMF' chunks should be
-  used to control the animation.
+used to control the animation.

-Reserved (R): 1 bit
+Image Fragmentation (F): 1 bit _\[status: experimental\]_

-: SHOULD be `0`.
+: Set if any of the frames in the image are represented by fragments.

 Reserved: 24 bits

@ -381,20 +382,13 @@ animation.
 Background Color: 32 bits (_uint32_)

 : The default background color of the canvas in \[Blue, Green, Red, Alpha\]
-  byte order. This color MAY be used to fill the unused space on the canvas
-  around the frames, as well as the transparent pixels of the first frame.
-  Background color is also used when disposal method is `1`.
+byte order. This color is used to fill the unused space on the canvas around the
+frames, as well as the transparent pixels of the first frame. Background color
+is also used when disposal method is `1`.

-**Note**:
-
-  * Background color MAY contain a transparency value (alpha), even if the
-    _Alpha_ flag in [VP8X chunk](#extended_header) is unset.
-
-  * Viewer applications SHOULD treat the background color value as a hint, and
-    are not required to use it.
-
-  * The canvas is cleared at the start of each loop. The background color MAY be
-    used to achieve this.
+**Note**: Viewers that have a preferred background against which to present the
+images (web browsers, for example) should ignore this value and use their
+preferred background color instead.

 Loop Count: 16 bits (_uint16_)

@ -404,6 +398,7 @@ This chunk MUST appear if the _Animation_ flag in the VP8X chunk is set.
 If the _Animation_ flag is not set and this chunk is present, it
 SHOULD be ignored.

+
 ANMF chunk:

 For animated images, this chunk contains information about a _single_ frame.
@ -420,7 +415,7 @@ If the _Animation flag_ is not set, then this chunk SHOULD NOT be present.
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    ...             |           Frame Height Minus One              |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    |                 Frame Duration                |  Reserved |B|D|
+    |                 Frame Duration                |  Reserved   |D|
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |                         Frame Data                            |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@ -446,37 +441,27 @@ Frame Height Minus One: 24 bits (_uint24_)
 Frame Duration: 24 bits (_uint24_)

 : The time to wait before displaying the next frame, in 1 millisecond units.
-  In particular, frame duration of 0 is useful when one wants to update
-  multiple areas of the canvas at once during the animation.
+In particular, frame duration of 0 is useful when one wants to update multiple
+areas of the canvas at once during the animation.

-Reserved: 6 bits
+Reserved: 7 bits

 : SHOULD be 0.

-Blending method (B): 1 bit
-
-: Indicates how transparent pixels of _the current frame_ are to be blended
-  with corresponding pixels of the previous canvas:
-
-    * `0`: Use alpha blending. After disposing of the previous frame, render the
-      current frame on the canvas using [alpha-blending](#alpha-blending). If
-      the current frame does not have an alpha channel, assume alpha value of
-      255, effectively replacing the rectangle.
-
-    * `1`: Do not blend. After disposing of the previous frame, render the
-      current frame on the canvas by overwriting the rectangle covered by the
-      current frame.
-
 Disposal method (D): 1 bit

-: Indicates how _the current frame_ is to be treated after it has been
-  displayed (before rendering the next frame) on the canvas:
+: Indicates how _the current frame_ is to be treated after it has been displayed
+(before rendering the next frame) on the canvas:

-    * `0`: Do not dispose. Leave the canvas as is.
+  * `0`: Do not dispose. Leave the canvas as is.

-    * `1`: Dispose to background color. Fill the _rectangle_ on the canvas
-      covered by the _current frame_ with background color specified in the
-      [ANIM chunk](#anim_chunk).
+  * `1`: Dispose to background color. Fill the _rectangle_ on the canvas covered
+    by the _current frame_ with background color specified in the
+    [ANIM chunk](#anim_chunk).
+
+After disposing the current frame, render the next frame on the canvas using
+[alpha-blending](#alpha-blending). If the next frame does not have an alpha
+channel, assume alpha value of 255, effectively replacing the rectangle.

 **Notes**:

@ -507,7 +492,9 @@ Disposal method (D): 1 bit

 Frame Data: _Chunk Size_ - `16` bytes

-: Consists of:
+: For a fragmented frame, it consists of multiple [fragment chunks](#fragments).
+
+: For a non-fragmented frame, it consists of:

  * An optional [alpha subchunk](#alpha) for the frame.

@ -518,6 +505,49 @@ Frame Data: _Chunk Size_ - `16` bytes
 **Note**: The 'ANMF' payload, _Frame Data_ above, consists of individual
 _padded_ chunks as described by the [RIFF file format](#riff-file-format).

+#### Fragments _\[status: experimental\]_
+
+For images that are represented by fragments, this chunk contains data for
+a single fragment. If the _Image Fragmentation Flag_ is not set, then this chunk
+SHOULD NOT be present.
+
+     0                   1                   2                   3
+     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                      ChunkHeader('FRGM')                      |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                  Fragment X                   |             ...
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    ...       Fragment Y            |         Fragment Data         |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+Fragment X: 24 bits (_uint24_)
+
+: The X coordinate of the upper left corner of the fragment is `Fragment X * 2`
+
+Fragment Y: 24 bits (_uint24_)
+
+: The Y coordinate of the upper left corner of the fragment is `Fragment Y * 2`
+
+Fragment Data: _Chunk Size_ - `6` bytes
+
+: It contains:
+
+  * An optional [alpha subchunk](#alpha) for the fragment.
+  * The [bitstream subchunk](#bitstream-vp8vp8l) for the fragment.
+  * An optional list of [unknown chunks](#unknown-chunks).
+
+Note: The width and height of the fragment is obtained from the bitstream
+subchunk.
+
+The fragments of a frame SHOULD have the following properties:
+
+  * They collectively cover the whole frame.
+
+  * No pair of fragments have any overlapping region on the frame.
+
+  * No portion of any fragment should be located outside of the canvas.
+
 #### Alpha

     0                   1                   2                   3
@ -535,20 +565,20 @@ Reserved (Rsv): 2 bits
 Pre-processing (P): 2 bits

 : These INFORMATIVE bits are used to signal the pre-processing that has
-  been performed during compression. The decoder can use this information to
-  e.g. dither the values or smooth the gradients prior to display.
+been performed during compression. The decoder can use this information to
+e.g. dither the values or smooth the gradients prior to display.

-    * `0`: no pre-processing
-    * `1`: level reduction
+  * `0`: no pre-processing
+  * `1`: level reduction

 Filtering method (F): 2 bits

 : The filtering method used:

-    * `0`: None.
-    * `1`: Horizontal filter.
-    * `2`: Vertical filter.
-    * `3`: Gradient filter.
+  * `0`: None.
+  * `1`: Horizontal filter.
+  * `2`: Vertical filter.
+  * `3`: Gradient filter.

 For each pixel, filtering is performed using the following calculations.
 Assume the alpha values surrounding the current `X` position are labeled as:
@ -592,15 +622,15 @@ Compression method (C): 2 bits

 : The compression method used:

-    * `0`: No compression.
-    * `1`: Compressed using the WebP lossless format.
+  * `0`: No compression.
+  * `1`: Compressed using the WebP lossless format.

 Alpha bitstream: _Chunk Size_ - `1` bytes

 : Encoded alpha bitstream.

-This optional chunk contains encoded alpha data for this frame. A frame
-containing a 'VP8L' chunk SHOULD NOT contain this chunk.
+This optional chunk contains encoded alpha data for this frame/fragment. A
+frame/fragment containing a 'VP8L' chunk SHOULD NOT contain this chunk.

 **Rationale**: The transparency information is already part of the 'VP8L'
 chunk.
@ -631,15 +661,15 @@ compression method is '0') or compressed using the lossless format

 #### Bitstream (VP8/VP8L)

-This chunk contains compressed bitstream data for a single frame.
+This chunk contains compressed bitstream data for a single frame/fragment.

 A bitstream chunk may be either (i) a VP8 chunk, using "VP8 " (note the
 significant fourth-character space) as its tag _or_ (ii) a VP8L chunk, using
 "VP8L" as its tag.

 The formats of VP8 and VP8L chunks are as described in sections
-[Simple File Format (Lossy)](#simple-file-format-lossy)
-and [Simple File Format (Lossless)](#simple-file-format-lossless) respectively.
+[Simple file format (lossy)](#simple-file-format-lossy)
+and [Simple file format (lossless)](#simple-file-format-lossless) respectively.

 #### Color profile

@ -687,6 +717,7 @@ EXIF Metadata: _Chunk Size_ bytes

 : image metadata in EXIF format.

+
 XMP chunk:

     0                   1                   2                   3
@ -717,17 +748,47 @@ A file MAY contain unknown chunks:

  * At the end of the file as described in [Extended WebP file
    header](#extended_header) section.
-  * At the end of ANMF chunks as described in the
-    [Animation](#animation) section.
+  * At the end of FRGM and ANMF chunks as described in [Fragments](#fragments)
+    and [Animation](#animation) sections.

 Readers SHOULD ignore these chunks. Writers SHOULD preserve them in their
 original order (unless they specifically intend to modify these chunks).

-### Assembling the Canvas from frames
+### Assembling the Canvas from fragments/frames

-Here we provide an overview of how a reader should assemble a canvas in the
-case of an animated image. The notation _VP8X.field_ means the field in the
-'VP8X' chunk with the same description.
+Here we provide an overview of how a reader should assemble a canvas in case
+of a fragmented-image and in case of an animated image. The notation
+_VP8X.field_ means the field in the 'VP8X' chunk with the same description.
+
+Displaying a _fragmented image_ canvas MUST be equivalent to the following
+pseudocode: _\[status: experimental\]_
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+assert VP8X.flags.hasFragments
+canvas ← new black image of size VP8X.canvasWidth x VP8X.canvasHeight.
+frgm_params ← nil
+for chunk in image_data:
+    assert chunk.tag is "FRGM"
+    frgm_params.fragmentX = Fragment X
+    frgm_params.fragmentY = Fragment Y
+    for subchunk in 'Fragment Data':
+        if subchunk.tag == "ALPH":
+            assert alpha subchunks not found in 'Fragment Data' earlier
+            frgm_params.alpha = alpha_data
+        else if subchunk.tag == "VP8 " OR subchunk.tag == "VP8L":
+            assert bitstream subchunks not found in 'Fragment Data' earlier
+            frgm_params.bitstream = bitstream_data
+    frgm_params.fragmentWidth = Width extracted from bitstream subchunk
+    frgm_params.fragmentHeight = Height extracted from bitstream subchunk
+    assert VP8X.canvasWidth >=
+        frgm_params.fragmentX + frgm_params.fragmentWidth
+    assert VP8X.canvasHeight >=
+        frgm_params.fragmentY + frgm_params.fragmentHeight
+    assert fragment has the properties mentioned in "Image Fragments" section.
+    render fragment with frame_params.alpha and frame_params.bitstream on canvas
+    with top-left corner in (frgm_params.fragmentX, frgm_params.fragmentY).
+canvas contains the decoded canvas.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 Displaying an _animated image_ canvas MUST be equivalent to the following
 pseudocode:
@ -735,25 +796,28 @@ pseudocode:
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 assert VP8X.flags.hasAnimation
 canvas ← new image of size VP8X.canvasWidth x VP8X.canvasHeight with
-         background color ANIM.background_color.
+background color ANIM.background_color.
 loop_count ← ANIM.loopCount
 dispose_method ← ANIM.disposeMethod
 if loop_count == 0:
    loop_count = ∞
 frame_params ← nil
-assert next chunk in image_data is ANMF
-for loop = 0..loop_count - 1
-    clear canvas to ANIM.background_color or application defined color
-    until eof or non-ANMF chunk
-        frame_params.frameX = Frame X
-        frame_params.frameY = Frame Y
-        frame_params.frameWidth = Frame Width Minus One + 1
-        frame_params.frameHeight = Frame Height Minus One + 1
-        frame_params.frameDuration = Frame Duration
-        frame_right = frame_params.frameX + frame_params.frameWidth
-        frame_bottom = frame_params.frameY + frame_params.frameHeight
-        assert VP8X.canvasWidth >= frame_right
-        assert VP8X.canvasHeight >= frame_bottom
+for loop = 0, ..., loop_count - 1
+    assert next chunk in image_data is ANMF
+    frame_params.frameX = Frame X
+    frame_params.frameY = Frame Y
+    frame_params.frameWidth = Frame Width Minus One + 1
+    frame_params.frameHeight = Frame Height Minus One + 1
+    frame_params.frameDuration = Frame Duration
+    assert VP8X.canvasWidth >= frame_params.frameX + frame_params.frameWidth
+    assert VP8X.canvasHeight >= frame_params.frameY + frame_params.frameHeight
+    if VP8X.flags.hasFragments and first subchunk in 'Frame Data' is FRGM
+        // Fragmented frame.
+        frame_params.{bitstream,alpha} = canvas decoded from subchunks in
+                                         'Frame Data' as per the pseudocode for
+                                         _fragmented image_ above.
+    else
+        // Non-fragmented frame.
        for subchunk in 'Frame Data':
            if subchunk.tag == "ALPH":
                assert alpha subchunks not found in 'Frame Data' earlier
@ -761,15 +825,14 @@ for loop = 0..loop_count - 1
            else if subchunk.tag == "VP8 " OR subchunk.tag == "VP8L":
                assert bitstream subchunks not found in 'Frame Data' earlier
                frame_params.bitstream = bitstream_data
-        render frame with frame_params.alpha and frame_params.bitstream on
-            canvas with top-left corner at (frame_params.frameX,
-            frame_params.frameY), using dispose method dispose_method.
-        canvas contains the decoded image.
-        Show the contents of the canvas for frame_params.frameDuration * 1ms.
+    render frame with frame_params.alpha and frame_params.bitstream on canvas
+    with top-left corner in (frame_params.frameX, frame_params.frameY), using
+    dispose method dispose_method.
+    Show the contents of the image for frame_params.frameDuration * 1ms.
+canvas contains the decoded canvas.
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-
-Example File Layouts
+Example file layouts
 --------------------

 A lossy encoded image with alpha may look as follows:
@ -801,6 +864,17 @@ RIFF/WEBP
 +- XMP  (metadata)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+A fragmented image may look as follows:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+RIFF/WEBP
+- VP8X (descriptions of features used)
+- FRGM (fragment1 parameters + data)
+- FRGM (fragment2 parameters + data)
+- FRGM (fragment3 parameters + data)
+- FRGM (fragment4 parameters + data)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 An animated image with EXIF metadata may look as follows:

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -815,8 +889,7 @@ RIFF/WEBP
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 [vp8spec]:  http://tools.ietf.org/html/rfc6386
-[webpllspec]: https://chromium.googlesource.com/webm/libwebp/+/master/doc/webp-lossless-bitstream-spec.txt
+[webpllspec]: https://gerrit.chromium.org/gerrit/gitweb?p=webm/libwebp.git;a=blob;f=doc/webp-lossless-bitstream-spec.txt;hb=master
 [iccspec]: http://www.color.org/icc_specs2.xalter
 [metadata]: http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf
-[rfc 1166]: http://tools.ietf.org/html/rfc1166
 [rfc 2119]: http://tools.ietf.org/html/rfc2119
--- a/doc/webp-lossless-bitstream-spec.txt
+++ b/doc/webp-lossless-bitstream-spec.txt
@ -14,7 +14,6 @@ Specification for WebP Lossless Bitstream

 _Jyrki Alakuijala, Ph.D., Google, Inc., 2012-06-19_

-Paragraphs marked as \[AMENDED\] were amended on 2014-09-16.

 Abstract
 --------
@ -173,8 +172,8 @@ It should be set to 0 when all alpha values are 255 in the picture, and
 int alpha_is_used = ReadBits(1);
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-The version_number is a 3 bit code that must be set to 0. Any other value
-should be treated as an error. \[AMENDED\]
+The version_number is a 3 bit code that must be discarded by the decoder
+at this time. Complying encoders write a 3-bit value 0.

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 int version_number = ReadBits(3);
@ -331,7 +330,7 @@ uint32 Select(uint32 L, uint32 T, uint32 TL) {
           abs(pGreen - GREEN(T)) + abs(pBlue - BLUE(T));

  // Return either left or top, the one closer to the prediction.
-  if (pL < pT) {     // \[AMENDED\]
+  if (pL <= pT) {
    return L;
  } else {
    return T;
@ -543,9 +542,6 @@ color.
 argb = color_table[GREEN(argb)];
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-If the index is equal or larger than color_table_size, the argb color value
-should be set to 0x00000000 (transparent black).  \[AMENDED\]
-
 When the color table is small (equal to or less than 16 colors), several
 pixels are bundled into a single pixel. The pixel bundling packs several
 (2, 4, or 8) pixels into a single pixel, reducing the image width
--- a/examples/Android.mk
+++ b/examples/Android.mk
@ -1,71 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-
-################################################################################
-# libexample_util
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-    example_util.c \
-
-LOCAL_CFLAGS := $(WEBP_CFLAGS)
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
-
-LOCAL_MODULE := example_util
-
-include $(BUILD_STATIC_LIBRARY)
-
-################################################################################
-# cwebp
-
-include $(CLEAR_VARS)
-
-# Note: to enable jpeg/png encoding the sources from AOSP can be used with
-# minor modification to their Android.mk files.
-LOCAL_SRC_FILES := \
-    cwebp.c \
-    jpegdec.c \
-    metadata.c \
-    pngdec.c \
-    tiffdec.c \
-    webpdec.c \
-
-LOCAL_CFLAGS := $(WEBP_CFLAGS)
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
-LOCAL_STATIC_LIBRARIES := example_util webp
-
-LOCAL_MODULE := cwebp
-
-include $(BUILD_EXECUTABLE)
-
-################################################################################
-# dwebp
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-    dwebp.c \
-
-LOCAL_CFLAGS := $(WEBP_CFLAGS)
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
-LOCAL_STATIC_LIBRARIES := example_util webp
-
-LOCAL_MODULE := dwebp
-
-include $(BUILD_EXECUTABLE)
-
-################################################################################
-# webpmux
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-    webpmux.c \
-
-LOCAL_CFLAGS := $(WEBP_CFLAGS)
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
-LOCAL_STATIC_LIBRARIES := example_util webpmux webp
-
-LOCAL_MODULE := webpmux_example
-
-include $(BUILD_EXECUTABLE)
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@ -1,4 +1,4 @@
-AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
+AM_CPPFLAGS = -I$(top_srcdir)/src

 bin_PROGRAMS = dwebp cwebp
 if BUILD_VWEBP
@ -14,17 +14,7 @@ endif

 noinst_LTLIBRARIES = libexampleutil.la

-libexampleutil_la_SOURCES = example_util.c example_util.h stopwatch.h
-
-if BUILD_ANIMDIFF
-  noinst_PROGRAMS = anim_diff
-endif
-
-anim_diff_SOURCES = anim_diff.c anim_util.c anim_util.h
-anim_diff_CPPFLAGS = $(AM_CPPFLAGS) $(GIF_INCLUDES)
-anim_diff_LDADD  = ../src/demux/libwebpdemux.la
-anim_diff_LDADD += libexampleutil.la
-anim_diff_LDADD += $(GIF_LIBS) -lm
+libexampleutil_la_SOURCES = example_util.c example_util.h

 dwebp_SOURCES = dwebp.c stopwatch.h
 dwebp_CPPFLAGS  = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
@ -35,14 +25,12 @@ cwebp_SOURCES  = cwebp.c metadata.c metadata.h stopwatch.h
 cwebp_SOURCES += jpegdec.c jpegdec.h
 cwebp_SOURCES += pngdec.c pngdec.h
 cwebp_SOURCES += tiffdec.c tiffdec.h
-cwebp_SOURCES += webpdec.c webpdec.h
 cwebp_SOURCES += wicdec.c wicdec.h
 cwebp_CPPFLAGS  = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
 cwebp_CPPFLAGS += $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
-cwebp_LDADD  = libexampleutil.la ../src/libwebp.la
-cwebp_LDADD += $(JPEG_LIBS) $(PNG_LIBS) $(TIFF_LIBS)
+cwebp_LDADD = ../src/libwebp.la $(JPEG_LIBS) $(PNG_LIBS) $(TIFF_LIBS)

-gif2webp_SOURCES = gif2webp.c gifdec.c gifdec.h
+gif2webp_SOURCES = gif2webp.c
 gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
 gif2webp_LDADD  = libexampleutil.la ../src/mux/libwebpmux.la ../src/libwebp.la
 gif2webp_LDADD += $(GIF_LIBS)
@ -56,11 +44,9 @@ vwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GL_INCLUDES)
 vwebp_LDADD = libexampleutil.la ../src/demux/libwebpdemux.la $(GL_LIBS)

 if BUILD_LIBWEBPDECODER
-  anim_diff_LDADD += ../src/libwebpdecoder.la
  dwebp_LDADD += ../src/libwebpdecoder.la
  vwebp_LDADD += ../src/libwebpdecoder.la
 else
-  anim_diff_LDADD += ../src/libwebp.la
  dwebp_LDADD += ../src/libwebp.la
  vwebp_LDADD += ../src/libwebp.la
 endif
--- a/examples/anim_diff.c
+++ b/examples/anim_diff.c
@ -1,220 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Checks if given pair of animated GIF/WebP images are identical:
-// That is: their reconstructed canvases match pixel-by-pixel and their other
-// animation properties (loop count etc) also match.
-//
-// example: anim_diff foo.gif bar.webp
-
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>  // for 'strtod'.
-#include <string.h>  // for 'strcmp'.
-
-#include "./anim_util.h"
-
-#if defined(_MSC_VER) && _MSC_VER < 1900
-#define snprintf _snprintf
-#endif
-
-// Returns true if 'a + b' will overflow.
-static int AdditionWillOverflow(int a, int b) {
-  return (b > 0) && (a > INT_MAX - b);
-}
-
-// Minimize number of frames by combining successive frames that have exact same
-// ARGB data into a single longer duration frame.
-static void MinimizeAnimationFrames(AnimatedImage* const img) {
-  uint32_t i;
-  for (i = 1; i < img->num_frames; ++i) {
-    DecodedFrame* const frame1 = &img->frames[i - 1];
-    DecodedFrame* const frame2 = &img->frames[i];
-    const uint8_t* const rgba1 = frame1->rgba;
-    const uint8_t* const rgba2 = frame2->rgba;
-    // If merging frames will result in integer overflow for 'duration',
-    // skip merging.
-    if (AdditionWillOverflow(frame1->duration, frame2->duration)) continue;
-    if (!memcmp(rgba1, rgba2, img->canvas_width * 4 * img->canvas_height)) {
-      // Merge 'i+1'th frame into 'i'th frame.
-      frame1->duration += frame2->duration;
-      if (i + 1 < img->num_frames) {
-        memmove(&img->frames[i], &img->frames[i + 1],
-                (img->num_frames - i - 1) * sizeof(*img->frames));
-      }
-      --img->num_frames;
-      --i;
-    }
-  }
-}
-
-static int CompareValues(uint32_t a, uint32_t b, const char* output_str) {
-  if (a != b) {
-    fprintf(stderr, "%s: %d vs %d\n", output_str, a, b);
-    return 0;
-  }
-  return 1;
-}
-
-// Note: As long as frame durations and reconstructed frames are identical, it
-// is OK for other aspects like offsets, dispose/blend method to vary.
-static int CompareAnimatedImagePair(const AnimatedImage* const img1,
-                                    const AnimatedImage* const img2,
-                                    int premultiply,
-                                    double min_psnr) {
-  int ok = 1;
-  const int is_multi_frame_image = (img1->num_frames > 1);
-  uint32_t i;
-
-  ok = CompareValues(img1->canvas_width, img2->canvas_width,
-                     "Canvas width mismatch") && ok;
-  ok = CompareValues(img1->canvas_height, img2->canvas_height,
-                     "Canvas height mismatch") && ok;
-  ok = CompareValues(img1->num_frames, img2->num_frames,
-                     "Frame count mismatch") && ok;
-  if (!ok) return 0;  // These are fatal failures, can't proceed.
-
-  if (is_multi_frame_image) {  // Checks relevant for multi-frame images only.
-    ok = CompareValues(img1->loop_count, img2->loop_count,
-                       "Loop count mismatch") && ok;
-    ok = CompareValues(img1->bgcolor, img2->bgcolor,
-                       "Background color mismatch") && ok;
-  }
-
-  for (i = 0; i < img1->num_frames; ++i) {
-    // Pixel-by-pixel comparison.
-    const uint8_t* const rgba1 = img1->frames[i].rgba;
-    const uint8_t* const rgba2 = img2->frames[i].rgba;
-    int max_diff;
-    double psnr;
-    if (is_multi_frame_image) {  // Check relevant for multi-frame images only.
-      const char format[] = "Frame #%d, duration mismatch";
-      char tmp[sizeof(format) + 8];
-      ok = ok && (snprintf(tmp, sizeof(tmp), format, i) >= 0);
-      ok = ok && CompareValues(img1->frames[i].duration,
-                               img2->frames[i].duration, tmp);
-    }
-    GetDiffAndPSNR(rgba1, rgba2, img1->canvas_width, img1->canvas_height,
-                   premultiply, &max_diff, &psnr);
-    if (min_psnr > 0.) {
-      if (psnr < min_psnr) {
-        fprintf(stderr, "Frame #%d, psnr = %.2lf (min_psnr = %f)\n", i,
-                psnr, min_psnr);
-        ok = 0;
-      }
-    } else {
-      if (max_diff != 0) {
-        fprintf(stderr, "Frame #%d, max pixel diff: %d\n", i, max_diff);
-        ok = 0;
-      }
-    }
-  }
-  return ok;
-}
-
-static void Help(void) {
-  printf("Usage: anim_diff <image1> <image2> [options]\n");
-  printf("\nOptions:\n");
-  printf("  -dump_frames <folder> dump decoded frames in PAM format\n");
-  printf("  -min_psnr <float> ... minimum per-frame PSNR\n");
-  printf("  -raw_comparison ..... if this flag is not used, RGB is\n");
-  printf("                        premultiplied before comparison\n");
-}
-
-int main(int argc, const char* argv[]) {
-  int return_code = -1;
-  int dump_frames = 0;
-  const char* dump_folder = NULL;
-  double min_psnr = 0.;
-  int got_input1 = 0;
-  int got_input2 = 0;
-  int premultiply = 1;
-  int i, c;
-  const char* files[2] = { NULL, NULL };
-  AnimatedImage images[2];
-
-  if (argc < 3) {
-    Help();
-    return -1;
-  }
-
-  for (c = 1; c < argc; ++c) {
-    int parse_error = 0;
-    if (!strcmp(argv[c], "-dump_frames")) {
-      if (c < argc - 1) {
-        dump_frames = 1;
-        dump_folder = argv[++c];
-      } else {
-        parse_error = 1;
-      }
-    } else if (!strcmp(argv[c], "-min_psnr")) {
-      if (c < argc - 1) {
-        const char* const v = argv[++c];
-        char* end = NULL;
-        const double d = strtod(v, &end);
-        if (end == v) {
-          parse_error = 1;
-          fprintf(stderr, "Error! '%s' is not a floating point number.\n", v);
-        }
-        min_psnr = d;
-      } else {
-        parse_error = 1;
-      }
-    } else if (!strcmp(argv[c], "-raw_comparison")) {
-      premultiply = 0;
-    } else {
-      if (!got_input1) {
-        files[0] = argv[c];
-        got_input1 = 1;
-      } else if (!got_input2) {
-        files[1] = argv[c];
-        got_input2 = 1;
-      } else {
-        parse_error = 1;
-      }
-    }
-    if (parse_error) {
-      Help();
-      return -1;
-    }
-  }
-  if (!got_input2) {
-    Help();
-    return -1;
-  }
-
-  if (dump_frames) {
-    printf("Dumping decoded frames in: %s\n", dump_folder);
-  }
-
-  memset(images, 0, sizeof(images));
-  for (i = 0; i < 2; ++i) {
-    printf("Decoding file: %s\n", files[i]);
-    if (!ReadAnimatedImage(files[i], &images[i], dump_frames, dump_folder)) {
-      fprintf(stderr, "Error decoding file: %s\n Aborting.\n", files[i]);
-      return_code = -2;
-      goto End;
-    } else {
-      MinimizeAnimationFrames(&images[i]);
-    }
-  }
-
-  if (!CompareAnimatedImagePair(&images[0], &images[1],
-                                premultiply, min_psnr)) {
-    fprintf(stderr, "\nFiles %s and %s differ.\n", files[0], files[1]);
-    return_code = -3;
-  } else {
-    printf("\nFiles %s and %s are identical.\n", files[0], files[1]);
-    return_code = 0;
-  }
- End:
-  ClearAnimatedImage(&images[0]);
-  ClearAnimatedImage(&images[1]);
-  return return_code;
-}
--- a/examples/anim_util.c
+++ b/examples/anim_util.c
@ -1,754 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Utilities for animated images
-
-#include "./anim_util.h"
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-
-#ifdef WEBP_HAVE_GIF
-#include <gif_lib.h>
-#endif
-#include "webp/format_constants.h"
-#include "webp/decode.h"
-#include "webp/demux.h"
-#include "./example_util.h"
-
-#if defined(_MSC_VER) && _MSC_VER < 1900
-#define snprintf _snprintf
-#endif
-
-static const int kNumChannels = 4;
-
-// -----------------------------------------------------------------------------
-// Common utilities.
-
-// Returns true if the frame covers the full canvas.
-static int IsFullFrame(int width, int height,
-                       int canvas_width, int canvas_height) {
-  return (width == canvas_width && height == canvas_height);
-}
-
-static int AllocateFrames(AnimatedImage* const image, uint32_t num_frames) {
-  uint32_t i;
-  const size_t rgba_size =
-      image->canvas_width * kNumChannels * image->canvas_height;
-  uint8_t* const mem = (uint8_t*)malloc(num_frames * rgba_size * sizeof(*mem));
-  DecodedFrame* const frames =
-      (DecodedFrame*)malloc(num_frames * sizeof(*frames));
-
-  if (mem == NULL || frames == NULL) {
-    free(mem);
-    free(frames);
-    return 0;
-  }
-  free(image->raw_mem);
-  image->num_frames = num_frames;
-  image->frames = frames;
-  for (i = 0; i < num_frames; ++i) {
-    frames[i].rgba = mem + i * rgba_size;
-    frames[i].duration = 0;
-    frames[i].is_key_frame = 0;
-  }
-  image->raw_mem = mem;
-  return 1;
-}
-
-void ClearAnimatedImage(AnimatedImage* const image) {
-  if (image != NULL) {
-    free(image->raw_mem);
-    free(image->frames);
-    image->num_frames = 0;
-    image->frames = NULL;
-    image->raw_mem = NULL;
-  }
-}
-
-// Clear the canvas to transparent.
-static void ZeroFillCanvas(uint8_t* rgba,
-                           uint32_t canvas_width, uint32_t canvas_height) {
-  memset(rgba, 0, canvas_width * kNumChannels * canvas_height);
-}
-
-// Clear given frame rectangle to transparent.
-static void ZeroFillFrameRect(uint8_t* rgba, int rgba_stride, int x_offset,
-                              int y_offset, int width, int height) {
-  int j;
-  assert(width * kNumChannels <= rgba_stride);
-  rgba += y_offset * rgba_stride + x_offset * kNumChannels;
-  for (j = 0; j < height; ++j) {
-    memset(rgba, 0, width * kNumChannels);
-    rgba += rgba_stride;
-  }
-}
-
-// Copy width * height pixels from 'src' to 'dst'.
-static void CopyCanvas(const uint8_t* src, uint8_t* dst,
-                       uint32_t width, uint32_t height) {
-  assert(src != NULL && dst != NULL);
-  memcpy(dst, src, width * kNumChannels * height);
-}
-
-// Copy pixels in the given rectangle from 'src' to 'dst' honoring the 'stride'.
-static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
-                               int x_offset, int y_offset,
-                               int width, int height) {
-  int j;
-  const int width_in_bytes = width * kNumChannels;
-  const size_t offset = y_offset * stride + x_offset * kNumChannels;
-  assert(width_in_bytes <= stride);
-  src += offset;
-  dst += offset;
-  for (j = 0; j < height; ++j) {
-    memcpy(dst, src, width_in_bytes);
-    src += stride;
-    dst += stride;
-  }
-}
-
-// Canonicalize all transparent pixels to transparent black to aid comparison.
-static void CleanupTransparentPixels(uint32_t* rgba,
-                                     uint32_t width, uint32_t height) {
-  const uint32_t* const rgba_end = rgba + width * height;
-  while (rgba < rgba_end) {
-    const uint8_t alpha = (*rgba >> 24) & 0xff;
-    if (alpha == 0) {
-      *rgba = 0;
-    }
-    ++rgba;
-  }
-}
-
-// Dump frame to a PAM file. Returns true on success.
-static int DumpFrame(const char filename[], const char dump_folder[],
-                     uint32_t frame_num, const uint8_t rgba[],
-                     int canvas_width, int canvas_height) {
-  int ok = 0;
-  size_t max_len;
-  int y;
-  const char* base_name = NULL;
-  char* file_name = NULL;
-  FILE* f = NULL;
-
-  base_name = strrchr(filename, '/');
-  base_name = (base_name == NULL) ? filename : base_name + 1;
-  max_len = strlen(dump_folder) + 1 + strlen(base_name)
-          + strlen("_frame_") + strlen(".pam") + 8;
-  file_name = (char*)malloc(max_len * sizeof(*file_name));
-  if (file_name == NULL) goto End;
-
-  if (snprintf(file_name, max_len, "%s/%s_frame_%d.pam",
-               dump_folder, base_name, frame_num) < 0) {
-    fprintf(stderr, "Error while generating file name\n");
-    goto End;
-  }
-
-  f = fopen(file_name, "wb");
-  if (f == NULL) {
-    fprintf(stderr, "Error opening file for writing: %s\n", file_name);
-    ok = 0;
-    goto End;
-  }
-  if (fprintf(f, "P7\nWIDTH %d\nHEIGHT %d\n"
-              "DEPTH 4\nMAXVAL 255\nTUPLTYPE RGB_ALPHA\nENDHDR\n",
-              canvas_width, canvas_height) < 0) {
-    fprintf(stderr, "Write error for file %s\n", file_name);
-    goto End;
-  }
-  for (y = 0; y < canvas_height; ++y) {
-    if (fwrite((const char*)(rgba) + y * canvas_width * kNumChannels,
-               canvas_width * kNumChannels, 1, f) != 1) {
-      fprintf(stderr, "Error writing to file: %s\n", file_name);
-      goto End;
-    }
-  }
-  ok = 1;
- End:
-  if (f != NULL) fclose(f);
-  free(file_name);
-  return ok;
-}
-
-// -----------------------------------------------------------------------------
-// WebP Decoding.
-
-// Returns true if this is a valid WebP bitstream.
-static int IsWebP(const WebPData* const webp_data) {
-  return (WebPGetInfo(webp_data->bytes, webp_data->size, NULL, NULL) != 0);
-}
-
-// Read animated WebP bitstream 'file_str' into 'AnimatedImage' struct.
-static int ReadAnimatedWebP(const char filename[],
-                            const WebPData* const webp_data,
-                            AnimatedImage* const image, int dump_frames,
-                            const char dump_folder[]) {
-  int ok = 0;
-  int dump_ok = 1;
-  uint32_t frame_index = 0;
-  int prev_frame_timestamp = 0;
-  WebPAnimDecoder* dec;
-  WebPAnimInfo anim_info;
-
-  memset(image, 0, sizeof(*image));
-
-  dec = WebPAnimDecoderNew(webp_data, NULL);
-  if (dec == NULL) {
-    fprintf(stderr, "Error parsing image: %s\n", filename);
-    goto End;
-  }
-
-  if (!WebPAnimDecoderGetInfo(dec, &anim_info)) {
-    fprintf(stderr, "Error getting global info about the animation\n");
-    goto End;
-  }
-
-  // Animation properties.
-  image->canvas_width = anim_info.canvas_width;
-  image->canvas_height = anim_info.canvas_height;
-  image->loop_count = anim_info.loop_count;
-  image->bgcolor = anim_info.bgcolor;
-
-  // Allocate frames.
-  if (!AllocateFrames(image, anim_info.frame_count)) return 0;
-
-  // Decode frames.
-  while (WebPAnimDecoderHasMoreFrames(dec)) {
-    DecodedFrame* curr_frame;
-    uint8_t* curr_rgba;
-    uint8_t* frame_rgba;
-    int timestamp;
-
-    if (!WebPAnimDecoderGetNext(dec, &frame_rgba, &timestamp)) {
-      fprintf(stderr, "Error decoding frame #%u\n", frame_index);
-      goto End;
-    }
-    curr_frame = &image->frames[frame_index];
-    curr_rgba = curr_frame->rgba;
-    curr_frame->duration = timestamp - prev_frame_timestamp;
-    curr_frame->is_key_frame = 0;  // Unused.
-    memcpy(curr_rgba, frame_rgba,
-           image->canvas_width * kNumChannels * image->canvas_height);
-
-    // Needed only because we may want to compare with GIF later.
-    CleanupTransparentPixels((uint32_t*)curr_rgba,
-                             image->canvas_width, image->canvas_height);
-
-    if (dump_frames && dump_ok) {
-      dump_ok = DumpFrame(filename, dump_folder, frame_index, curr_rgba,
-                          image->canvas_width, image->canvas_height);
-      if (!dump_ok) {  // Print error once, but continue decode loop.
-        fprintf(stderr, "Error dumping frames to %s\n", dump_folder);
-      }
-    }
-
-    ++frame_index;
-    prev_frame_timestamp = timestamp;
-  }
-  ok = dump_ok;
-
- End:
-  WebPAnimDecoderDelete(dec);
-  return ok;
-}
-
-// -----------------------------------------------------------------------------
-// GIF Decoding.
-
-// Returns true if this is a valid GIF bitstream.
-static int IsGIF(const WebPData* const data) {
-  return data->size > GIF_STAMP_LEN &&
-         (!memcmp(GIF_STAMP, data->bytes, GIF_STAMP_LEN) ||
-          !memcmp(GIF87_STAMP, data->bytes, GIF_STAMP_LEN) ||
-          !memcmp(GIF89_STAMP, data->bytes, GIF_STAMP_LEN));
-}
-
-#ifdef WEBP_HAVE_GIF
-
-// GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
-#if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
-# define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
-# define LOCAL_GIF_PREREQ(maj, min) \
-    (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_GIF_VERSION 0
-# define LOCAL_GIF_PREREQ(maj, min) 0
-#endif
-
-#if !LOCAL_GIF_PREREQ(5, 0)
-
-// Added in v5.0
-typedef struct {
-  int DisposalMode;
-#define DISPOSAL_UNSPECIFIED      0       // No disposal specified
-#define DISPOSE_DO_NOT            1       // Leave image in place
-#define DISPOSE_BACKGROUND        2       // Set area to background color
-#define DISPOSE_PREVIOUS          3       // Restore to previous content
-  int UserInputFlag;       // User confirmation required before disposal
-  int DelayTime;           // Pre-display delay in 0.01sec units
-  int TransparentColor;    // Palette index for transparency, -1 if none
-#define NO_TRANSPARENT_COLOR     -1
-} GraphicsControlBlock;
-
-static int DGifExtensionToGCB(const size_t GifExtensionLength,
-                              const GifByteType* GifExtension,
-                              GraphicsControlBlock* gcb) {
-  if (GifExtensionLength != 4) {
-    return GIF_ERROR;
-  }
-  gcb->DisposalMode = (GifExtension[0] >> 2) & 0x07;
-  gcb->UserInputFlag = (GifExtension[0] & 0x02) != 0;
-  gcb->DelayTime = GifExtension[1] | (GifExtension[2] << 8);
-  if (GifExtension[0] & 0x01) {
-    gcb->TransparentColor = (int)GifExtension[3];
-  } else {
-    gcb->TransparentColor = NO_TRANSPARENT_COLOR;
-  }
-  return GIF_OK;
-}
-
-static int DGifSavedExtensionToGCB(GifFileType* GifFile, int ImageIndex,
-                                   GraphicsControlBlock* gcb) {
-  int i;
-  if (ImageIndex < 0 || ImageIndex > GifFile->ImageCount - 1) {
-    return GIF_ERROR;
-  }
-  gcb->DisposalMode = DISPOSAL_UNSPECIFIED;
-  gcb->UserInputFlag = 0;
-  gcb->DelayTime = 0;
-  gcb->TransparentColor = NO_TRANSPARENT_COLOR;
-
-  for (i = 0; i < GifFile->SavedImages[ImageIndex].ExtensionBlockCount; i++) {
-    ExtensionBlock* ep = &GifFile->SavedImages[ImageIndex].ExtensionBlocks[i];
-    if (ep->Function == GRAPHICS_EXT_FUNC_CODE) {
-      return DGifExtensionToGCB(
-          ep->ByteCount, (const GifByteType*)ep->Bytes, gcb);
-    }
-  }
-  return GIF_ERROR;
-}
-
-#define CONTINUE_EXT_FUNC_CODE 0x00
-
-// Signature was changed in v5.0
-#define DGifOpenFileName(a, b) DGifOpenFileName(a)
-
-#endif  // !LOCAL_GIF_PREREQ(5, 0)
-
-// Signature changed in v5.1
-#if !LOCAL_GIF_PREREQ(5, 1)
-#define DGifCloseFile(a, b) DGifCloseFile(a)
-#endif
-
-static void GIFDisplayError(const GifFileType* const gif, int gif_error) {
-  // libgif 4.2.0 has retired PrintGifError() and added GifErrorString().
-#if LOCAL_GIF_PREREQ(4, 2)
-#if LOCAL_GIF_PREREQ(5, 0)
-  const char* error_str =
-      GifErrorString((gif == NULL) ? gif_error : gif->Error);
-#else
-  const char* error_str = GifErrorString();
-  (void)gif;
-#endif
-  if (error_str == NULL) error_str = "Unknown error";
-  fprintf(stderr, "GIFLib Error %d: %s\n", gif_error, error_str);
-#else
-  (void)gif;
-  fprintf(stderr, "GIFLib Error %d: ", gif_error);
-  PrintGifError();
-  fprintf(stderr, "\n");
-#endif
-}
-
-static int IsKeyFrameGIF(const GifImageDesc* prev_desc, int prev_dispose,
-                         const DecodedFrame* const prev_frame,
-                         int canvas_width, int canvas_height) {
-  if (prev_frame == NULL) return 1;
-  if (prev_dispose == DISPOSE_BACKGROUND) {
-    if (IsFullFrame(prev_desc->Width, prev_desc->Height,
-                    canvas_width, canvas_height)) {
-      return 1;
-    }
-    if (prev_frame->is_key_frame) return 1;
-  }
-  return 0;
-}
-
-static int GetTransparentIndexGIF(GifFileType* gif) {
-  GraphicsControlBlock first_gcb;
-  memset(&first_gcb, 0, sizeof(first_gcb));
-  DGifSavedExtensionToGCB(gif, 0, &first_gcb);
-  return first_gcb.TransparentColor;
-}
-
-static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
-  const int transparent_index = GetTransparentIndexGIF(gif);
-  const ColorMapObject* const color_map = gif->SColorMap;
-  if (transparent_index != NO_TRANSPARENT_COLOR &&
-      gif->SBackGroundColor == transparent_index) {
-    return 0x00ffffff;  // Special case: transparent white.
-  } else if (color_map == NULL || color_map->Colors == NULL
-             || gif->SBackGroundColor >= color_map->ColorCount) {
-    return 0xffffffff;  // Invalid: assume white.
-  } else {
-    const GifColorType color = color_map->Colors[gif->SBackGroundColor];
-    return (0xff << 24) |
-           (color.Red << 16) |
-           (color.Green << 8) |
-           (color.Blue << 0);
-  }
-}
-
-// Find appropriate app extension and get loop count from the next extension.
-static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
-  int i;
-  for (i = 0; i < gif->ImageCount; ++i) {
-    const SavedImage* const image = &gif->SavedImages[i];
-    int j;
-    for (j = 0; (j + 1) < image->ExtensionBlockCount; ++j) {
-      const ExtensionBlock* const eb1 = image->ExtensionBlocks + j;
-      const ExtensionBlock* const eb2 = image->ExtensionBlocks + j + 1;
-      const char* const signature = (const char*)eb1->Bytes;
-      const int signature_is_ok =
-          (eb1->Function == APPLICATION_EXT_FUNC_CODE) &&
-          (eb1->ByteCount == 11) &&
-          (!memcmp(signature, "NETSCAPE2.0", 11) ||
-           !memcmp(signature, "ANIMEXTS1.0", 11));
-      if (signature_is_ok &&
-          eb2->Function == CONTINUE_EXT_FUNC_CODE && eb2->ByteCount >= 3 &&
-          eb2->Bytes[0] == 1) {
-        return ((uint32_t)(eb2->Bytes[2]) << 8) +
-               ((uint32_t)(eb2->Bytes[1]) << 0);
-      }
-    }
-  }
-  return 0;  // Default.
-}
-
-// Get duration of 'n'th frame in milliseconds.
-static int GetFrameDurationGIF(GifFileType* gif, int n) {
-  GraphicsControlBlock gcb;
-  memset(&gcb, 0, sizeof(gcb));
-  DGifSavedExtensionToGCB(gif, n, &gcb);
-  return gcb.DelayTime * 10;
-}
-
-// Returns true if frame 'target' completely covers 'covered'.
-static int CoversFrameGIF(const GifImageDesc* const target,
-                          const GifImageDesc* const covered) {
-  return target->Left <= covered->Left &&
-         covered->Left + covered->Width <= target->Left + target->Width &&
-         target->Top <= covered->Top &&
-         covered->Top + covered->Height <= target->Top + target->Height;
-}
-
-static void RemapPixelsGIF(const uint8_t* const src,
-                           const ColorMapObject* const cmap,
-                           int transparent_color, int len, uint8_t* dst) {
-  int i;
-  for (i = 0; i < len; ++i) {
-    if (src[i] != transparent_color) {
-      // If a pixel in the current frame is transparent, we don't modify it, so
-      // that we can see-through the corresponding pixel from an earlier frame.
-      const GifColorType c = cmap->Colors[src[i]];
-      dst[4 * i + 0] = c.Red;
-      dst[4 * i + 1] = c.Green;
-      dst[4 * i + 2] = c.Blue;
-      dst[4 * i + 3] = 0xff;
-    }
-  }
-}
-
-static int ReadFrameGIF(const SavedImage* const gif_image,
-                        const ColorMapObject* cmap, int transparent_color,
-                        int out_stride, uint8_t* const dst) {
-  const GifImageDesc* image_desc = &gif_image->ImageDesc;
-  const uint8_t* in;
-  uint8_t* out;
-  int j;
-
-  if (image_desc->ColorMap) cmap = image_desc->ColorMap;
-
-  if (cmap == NULL || cmap->ColorCount != (1 << cmap->BitsPerPixel)) {
-    fprintf(stderr, "Potentially corrupt color map.\n");
-    return 0;
-  }
-
-  in = (const uint8_t*)gif_image->RasterBits;
-  out = dst + image_desc->Top * out_stride + image_desc->Left * kNumChannels;
-
-  for (j = 0; j < image_desc->Height; ++j) {
-    RemapPixelsGIF(in, cmap, transparent_color, image_desc->Width, out);
-    in += image_desc->Width;
-    out += out_stride;
-  }
-  return 1;
-}
-
-// Read animated GIF bitstream from 'filename' into 'AnimatedImage' struct.
-static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
-                           int dump_frames, const char dump_folder[]) {
-  uint32_t frame_count;
-  uint32_t canvas_width, canvas_height;
-  uint32_t i;
-  int gif_error;
-  GifFileType* gif;
-
-  gif = DGifOpenFileName(filename, NULL);
-  if (gif == NULL) {
-    fprintf(stderr, "Could not read file: %s.\n", filename);
-    return 0;
-  }
-
-  gif_error = DGifSlurp(gif);
-  if (gif_error != GIF_OK) {
-    fprintf(stderr, "Could not parse image: %s.\n", filename);
-    GIFDisplayError(gif, gif_error);
-    DGifCloseFile(gif, NULL);
-    return 0;
-  }
-
-  // Animation properties.
-  image->canvas_width = (uint32_t)gif->SWidth;
-  image->canvas_height = (uint32_t)gif->SHeight;
-  if (image->canvas_width > MAX_CANVAS_SIZE ||
-      image->canvas_height > MAX_CANVAS_SIZE) {
-    fprintf(stderr, "Invalid canvas dimension: %d x %d\n",
-            image->canvas_width, image->canvas_height);
-    DGifCloseFile(gif, NULL);
-    return 0;
-  }
-  image->loop_count = GetLoopCountGIF(gif);
-  image->bgcolor = GetBackgroundColorGIF(gif);
-
-  frame_count = (uint32_t)gif->ImageCount;
-  if (frame_count == 0) {
-    DGifCloseFile(gif, NULL);
-    return 0;
-  }
-
-  if (image->canvas_width == 0 || image->canvas_height == 0) {
-    image->canvas_width = gif->SavedImages[0].ImageDesc.Width;
-    image->canvas_height = gif->SavedImages[0].ImageDesc.Height;
-    gif->SavedImages[0].ImageDesc.Left = 0;
-    gif->SavedImages[0].ImageDesc.Top = 0;
-    if (image->canvas_width == 0 || image->canvas_height == 0) {
-      fprintf(stderr, "Invalid canvas size in GIF.\n");
-      DGifCloseFile(gif, NULL);
-      return 0;
-    }
-  }
-  // Allocate frames.
-  AllocateFrames(image, frame_count);
-
-  canvas_width = image->canvas_width;
-  canvas_height = image->canvas_height;
-
-  // Decode and reconstruct frames.
-  for (i = 0; i < frame_count; ++i) {
-    const int canvas_width_in_bytes = canvas_width * kNumChannels;
-    const SavedImage* const curr_gif_image = &gif->SavedImages[i];
-    GraphicsControlBlock curr_gcb;
-    DecodedFrame* curr_frame;
-    uint8_t* curr_rgba;
-
-    memset(&curr_gcb, 0, sizeof(curr_gcb));
-    DGifSavedExtensionToGCB(gif, i, &curr_gcb);
-
-    curr_frame = &image->frames[i];
-    curr_rgba = curr_frame->rgba;
-    curr_frame->duration = GetFrameDurationGIF(gif, i);
-
-    if (i == 0) {  // Initialize as transparent.
-      curr_frame->is_key_frame = 1;
-      ZeroFillCanvas(curr_rgba, canvas_width, canvas_height);
-    } else {
-      DecodedFrame* const prev_frame = &image->frames[i - 1];
-      const GifImageDesc* const prev_desc = &gif->SavedImages[i - 1].ImageDesc;
-      GraphicsControlBlock prev_gcb;
-      memset(&prev_gcb, 0, sizeof(prev_gcb));
-      DGifSavedExtensionToGCB(gif, i - 1, &prev_gcb);
-
-      curr_frame->is_key_frame =
-          IsKeyFrameGIF(prev_desc, prev_gcb.DisposalMode, prev_frame,
-                        canvas_width, canvas_height);
-
-      if (curr_frame->is_key_frame) {  // Initialize as transparent.
-        ZeroFillCanvas(curr_rgba, canvas_width, canvas_height);
-      } else {
-        int prev_frame_disposed, curr_frame_opaque;
-        int prev_frame_completely_covered;
-        // Initialize with previous canvas.
-        uint8_t* const prev_rgba = image->frames[i - 1].rgba;
-        CopyCanvas(prev_rgba, curr_rgba, canvas_width, canvas_height);
-
-        // Dispose previous frame rectangle.
-        prev_frame_disposed =
-            (prev_gcb.DisposalMode == DISPOSE_BACKGROUND ||
-             prev_gcb.DisposalMode == DISPOSE_PREVIOUS);
-        curr_frame_opaque =
-            (curr_gcb.TransparentColor == NO_TRANSPARENT_COLOR);
-        prev_frame_completely_covered =
-            curr_frame_opaque &&
-            CoversFrameGIF(&curr_gif_image->ImageDesc, prev_desc);
-
-        if (prev_frame_disposed && !prev_frame_completely_covered) {
-          switch (prev_gcb.DisposalMode) {
-            case DISPOSE_BACKGROUND: {
-              ZeroFillFrameRect(curr_rgba, canvas_width_in_bytes,
-                                prev_desc->Left, prev_desc->Top,
-                                prev_desc->Width, prev_desc->Height);
-              break;
-            }
-            case DISPOSE_PREVIOUS: {
-              int src_frame_num = i - 2;
-              while (src_frame_num >= 0) {
-                GraphicsControlBlock src_frame_gcb;
-                memset(&src_frame_gcb, 0, sizeof(src_frame_gcb));
-                DGifSavedExtensionToGCB(gif, src_frame_num, &src_frame_gcb);
-                if (src_frame_gcb.DisposalMode != DISPOSE_PREVIOUS) break;
-                --src_frame_num;
-              }
-              if (src_frame_num >= 0) {
-                // Restore pixels inside previous frame rectangle to
-                // corresponding pixels in source canvas.
-                uint8_t* const src_frame_rgba =
-                    image->frames[src_frame_num].rgba;
-                CopyFrameRectangle(src_frame_rgba, curr_rgba,
-                                   canvas_width_in_bytes,
-                                   prev_desc->Left, prev_desc->Top,
-                                   prev_desc->Width, prev_desc->Height);
-              } else {
-                // Source canvas doesn't exist. So clear previous frame
-                // rectangle to background.
-                ZeroFillFrameRect(curr_rgba, canvas_width_in_bytes,
-                                  prev_desc->Left, prev_desc->Top,
-                                  prev_desc->Width, prev_desc->Height);
-              }
-              break;
-            }
-            default:
-              break;  // Nothing to do.
-          }
-        }
-      }
-    }
-
-    // Decode current frame.
-    if (!ReadFrameGIF(curr_gif_image, gif->SColorMap, curr_gcb.TransparentColor,
-                      canvas_width_in_bytes, curr_rgba)) {
-      DGifCloseFile(gif, NULL);
-      return 0;
-    }
-
-    if (dump_frames) {
-      if (!DumpFrame(filename, dump_folder, i, curr_rgba,
-                     canvas_width, canvas_height)) {
-        DGifCloseFile(gif, NULL);
-        return 0;
-      }
-    }
-  }
-  DGifCloseFile(gif, NULL);
-  return 1;
-}
-
-#else
-
-static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
-                           int dump_frames, const char dump_folder[]) {
-  (void)filename;
-  (void)image;
-  (void)dump_frames;
-  (void)dump_folder;
-  fprintf(stderr, "GIF support not compiled. Please install the libgif-dev "
-          "package before building.\n");
-  return 0;
-}
-
-#endif  // WEBP_HAVE_GIF
-
-// -----------------------------------------------------------------------------
-
-int ReadAnimatedImage(const char filename[], AnimatedImage* const image,
-                      int dump_frames, const char dump_folder[]) {
-  int ok = 0;
-  WebPData webp_data;
-
-  WebPDataInit(&webp_data);
-  memset(image, 0, sizeof(*image));
-
-  if (!ExUtilReadFile(filename, &webp_data.bytes, &webp_data.size)) {
-    fprintf(stderr, "Error reading file: %s\n", filename);
-    return 0;
-  }
-
-  if (IsWebP(&webp_data)) {
-    ok = ReadAnimatedWebP(filename, &webp_data, image, dump_frames,
-                          dump_folder);
-  } else if (IsGIF(&webp_data)) {
-    ok = ReadAnimatedGIF(filename, image, dump_frames, dump_folder);
-  } else {
-    fprintf(stderr,
-            "Unknown file type: %s. Supported file types are WebP and GIF\n",
-            filename);
-    ok = 0;
-  }
-  if (!ok) ClearAnimatedImage(image);
-  WebPDataClear(&webp_data);
-  return ok;
-}
-
-static void Accumulate(double v1, double v2, double* const max_diff,
-                       double* const sse) {
-  const double diff = fabs(v1 - v2);
-  if (diff > *max_diff) *max_diff = diff;
-  *sse += diff * diff;
-}
-
-void GetDiffAndPSNR(const uint8_t rgba1[], const uint8_t rgba2[],
-                    uint32_t width, uint32_t height, int premultiply,
-                    int* const max_diff, double* const psnr) {
-  const uint32_t stride = width * kNumChannels;
-  const int kAlphaChannel = kNumChannels - 1;
-  double f_max_diff = 0.;
-  double sse = 0.;
-  uint32_t x, y;
-  for (y = 0; y < height; ++y) {
-    for (x = 0; x < stride; x += kNumChannels) {
-      int k;
-      const size_t offset = y * stride + x;
-      const int alpha1 = rgba1[offset + kAlphaChannel];
-      const int alpha2 = rgba2[offset + kAlphaChannel];
-      Accumulate(alpha1, alpha2, &f_max_diff, &sse);
-      if (!premultiply) {
-        for (k = 0; k < kAlphaChannel; ++k) {
-          Accumulate(rgba1[offset + k], rgba2[offset + k], &f_max_diff, &sse);
-        }
-      } else {
-        // premultiply R/G/B channels with alpha value
-        for (k = 0; k < kAlphaChannel; ++k) {
-          Accumulate(rgba1[offset + k] * alpha1 / 255.,
-                     rgba2[offset + k] * alpha2 / 255.,
-                     &f_max_diff, &sse);
-        }
-      }
-    }
-  }
-  *max_diff = (int)f_max_diff;
-  if (*max_diff == 0) {
-    *psnr = 99.;  // PSNR when images are identical.
-  } else {
-    sse /= stride * height;
-    *psnr = 4.3429448 * log(255. * 255. / sse);
-  }
-}
--- a/examples/anim_util.h
+++ b/examples/anim_util.h
@ -1,63 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Utilities for animated images
-
-#ifndef WEBP_EXAMPLES_ANIM_UTIL_H_
-#define WEBP_EXAMPLES_ANIM_UTIL_H_
-
-#ifdef HAVE_CONFIG_H
-#include "webp/config.h"
-#endif
-
-#include "webp/types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
-  uint8_t* rgba;         // Decoded and reconstructed full frame.
-  int duration;          // Frame duration in milliseconds.
-  int is_key_frame;      // True if this frame is a key-frame.
-} DecodedFrame;
-
-typedef struct {
-  uint32_t canvas_width;
-  uint32_t canvas_height;
-  uint32_t bgcolor;
-  uint32_t loop_count;
-  DecodedFrame* frames;
-  uint32_t num_frames;
-  void* raw_mem;
-} AnimatedImage;
-
-// Deallocate everything in 'image' (but not the object itself).
-void ClearAnimatedImage(AnimatedImage* const image);
-
-// Read animated image file into 'AnimatedImage' struct.
-// If 'dump_frames' is true, dump frames to 'dump_folder'.
-// Previous content of 'image' is obliterated.
-// Upon successful return, content of 'image' must be deleted by
-// calling 'ClearAnimatedImage'.
-int ReadAnimatedImage(const char filename[], AnimatedImage* const image,
-                      int dump_frames, const char dump_folder[]);
-
-// Given two RGBA buffers, calculate max pixel difference and PSNR.
-// If 'premultiply' is true, R/G/B values will be pre-multiplied by the
-// transparency before comparison.
-void GetDiffAndPSNR(const uint8_t rgba1[], const uint8_t rgba2[],
-                    uint32_t width, uint32_t height, int premultiply,
-                    int* const max_diff, double* const psnr);
-
-#ifdef __cplusplus
-}    // extern "C"
-#endif
-
-#endif  // WEBP_EXAMPLES_ANIM_UTIL_H_
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@ -17,29 +17,27 @@
 #include <string.h>

 #ifdef HAVE_CONFIG_H
-#include "webp/config.h"
+#include "config.h"
 #endif

 #include "webp/encode.h"

-#include "./example_util.h"
 #include "./metadata.h"
 #include "./stopwatch.h"

 #include "./jpegdec.h"
 #include "./pngdec.h"
 #include "./tiffdec.h"
-#include "./webpdec.h"
 #include "./wicdec.h"

 #ifndef WEBP_DLL
-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

 extern void* VP8GetCPUInfo;   // opaque forward declaration.

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif
 #endif  // WEBP_DLL
@ -48,62 +46,57 @@ extern void* VP8GetCPUInfo;   // opaque forward declaration.

 static int verbose = 0;

-static int ReadYUV(const uint8_t* const data, size_t data_size,
-                   WebPPicture* const pic) {
-  int y;
+static int ReadYUV(FILE* in_file, WebPPicture* const pic) {
  const int use_argb = pic->use_argb;
  const int uv_width = (pic->width + 1) / 2;
  const int uv_height = (pic->height + 1) / 2;
-  const int uv_plane_size = uv_width * uv_height;
-  const size_t expected_data_size =
-      pic->width * pic->height + 2 * uv_plane_size;
-
-  if (data_size != expected_data_size) {
-    fprintf(stderr,
-            "input data doesn't have the expected size (%d instead of %d)\n",
-            (int)data_size, (int)expected_data_size);
-    return 0;
-  }
+  int y;
+  int ok = 0;

  pic->use_argb = 0;
-  if (!WebPPictureAlloc(pic)) return 0;
+  if (!WebPPictureAlloc(pic)) return ok;

  for (y = 0; y < pic->height; ++y) {
-    memcpy(pic->y + y * pic->y_stride, data + y * pic->width,
-           pic->width * sizeof(*pic->y));
+    if (fread(pic->y + y * pic->y_stride, pic->width, 1, in_file) != 1) {
+      goto End;
+    }
  }
  for (y = 0; y < uv_height; ++y) {
-    const uint8_t* const uv_data = data + pic->height * pic->y_stride;
-    memcpy(pic->u + y * pic->uv_stride, uv_data + y * uv_width,
-           uv_width * sizeof(*uv_data));
-    memcpy(pic->v + y * pic->uv_stride, uv_data + y * uv_width + uv_plane_size,
-           uv_width * sizeof(*uv_data));
+    if (fread(pic->u + y * pic->uv_stride, uv_width, 1, in_file) != 1)
+      goto End;
  }
-  return use_argb ? WebPPictureYUVAToARGB(pic) : 1;
+  for (y = 0; y < uv_height; ++y) {
+    if (fread(pic->v + y * pic->uv_stride, uv_width, 1, in_file) != 1)
+      goto End;
+  }
+  ok = 1;
+  if (use_argb) ok = WebPPictureYUVAToARGB(pic);
+
+ End:
+  return ok;
 }

 #ifdef HAVE_WINCODEC_H

 static int ReadPicture(const char* const filename, WebPPicture* const pic,
                       int keep_alpha, Metadata* const metadata) {
-  int ok = 0;
-  const uint8_t* data = NULL;
-  size_t data_size = 0;
+  int ok;
  if (pic->width != 0 && pic->height != 0) {
-    ok = ExUtilReadFile(filename, &data, &data_size);
-    ok = ok && ReadYUV(data, data_size, pic);
+    // If image size is specified, infer it as YUV format.
+    FILE* in_file = fopen(filename, "rb");
+    if (in_file == NULL) {
+      fprintf(stderr, "Error! Cannot open input file '%s'\n", filename);
+      return 0;
+    }
+    ok = ReadYUV(in_file, pic);
+    fclose(in_file);
  } else {
    // If no size specified, try to decode it using WIC.
    ok = ReadPictureWithWIC(filename, pic, keep_alpha, metadata);
-    if (!ok) {
-      ok = ExUtilReadFile(filename, &data, &data_size);
-      ok = ok && ReadWebP(data, data_size, pic, keep_alpha, metadata);
-    }
  }
  if (!ok) {
    fprintf(stderr, "Error! Could not process file %s\n", filename);
  }
-  free((void*)data);
  return ok;
 }

@ -113,62 +106,58 @@ typedef enum {
  PNG_ = 0,
  JPEG_,
  TIFF_,  // 'TIFF' clashes with libtiff
-  WEBP_,
  UNSUPPORTED
 } InputFileFormat;

-static uint32_t GetBE32(const uint8_t buf[]) {
-  return ((uint32_t)buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
-}
-
-static InputFileFormat GuessImageType(const uint8_t buf[12]) {
+static InputFileFormat GetImageType(FILE* in_file) {
  InputFileFormat format = UNSUPPORTED;
-  const uint32_t magic1 = GetBE32(buf + 0);
-  const uint32_t magic2 = GetBE32(buf + 8);
-  if (magic1 == 0x89504E47U) {
+  uint32_t magic;
+  uint8_t buf[4];
+
+  if ((fread(&buf[0], 4, 1, in_file) != 1) ||
+      (fseek(in_file, 0, SEEK_SET) != 0)) {
+    return format;
+  }
+
+  magic = ((uint32_t)buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+  if (magic == 0x89504E47U) {
    format = PNG_;
-  } else if (magic1 >= 0xFFD8FF00U && magic1 <= 0xFFD8FFFFU) {
+  } else if (magic >= 0xFFD8FF00U && magic <= 0xFFD8FFFFU) {
    format = JPEG_;
-  } else if (magic1 == 0x49492A00 || magic1 == 0x4D4D002A) {
+  } else if (magic == 0x49492A00 || magic == 0x4D4D002A) {
    format = TIFF_;
-  } else if (magic1 == 0x52494646 && magic2 == 0x57454250) {
-    format = WEBP_;
  }
  return format;
 }

 static int ReadPicture(const char* const filename, WebPPicture* const pic,
                       int keep_alpha, Metadata* const metadata) {
-  const uint8_t* data = NULL;
-  size_t data_size = 0;
  int ok = 0;
-
-  ok = ExUtilReadFile(filename, &data, &data_size);
-  if (!ok) goto End;
+  FILE* in_file = fopen(filename, "rb");
+  if (in_file == NULL) {
+    fprintf(stderr, "Error! Cannot open input file '%s'\n", filename);
+    return ok;
+  }

  if (pic->width == 0 || pic->height == 0) {
-    ok = 0;
-    if (data_size >= 12) {
-      const InputFileFormat format = GuessImageType(data);
-      if (format == PNG_) {
-        ok = ReadPNG(data, data_size, pic, keep_alpha, metadata);
-      } else if (format == JPEG_) {
-        ok = ReadJPEG(data, data_size, pic, metadata);
-      } else if (format == TIFF_) {
-        ok = ReadTIFF(data, data_size, pic, keep_alpha, metadata);
-      } else if (format == WEBP_) {
-        ok = ReadWebP(data, data_size, pic, keep_alpha, metadata);
-      }
+    // If no size specified, try to decode it as PNG/JPEG (as appropriate).
+    const InputFileFormat format = GetImageType(in_file);
+    if (format == PNG_) {
+      ok = ReadPNG(in_file, pic, keep_alpha, metadata);
+    } else if (format == JPEG_) {
+      ok = ReadJPEG(in_file, pic, metadata);
+    } else if (format == TIFF_) {
+      ok = ReadTIFF(filename, pic, keep_alpha, metadata);
    }
  } else {
    // If image size is specified, infer it as YUV format.
-    ok = ReadYUV(data, data_size, pic);
+    ok = ReadYUV(in_file, pic);
  }
- End:
  if (!ok) {
    fprintf(stderr, "Error! Could not process file %s\n", filename);
  }
-  free((void*)data);
+
+  fclose(in_file);
  return ok;
 }

@ -212,8 +201,6 @@ static void PrintFullLosslessInfo(const WebPAuxStats* const stats,
                                  const char* const description) {
  fprintf(stderr, "Lossless-%s compressed size: %d bytes\n",
          description, stats->lossless_size);
-  fprintf(stderr, "  * Header size: %d bytes, image data size: %d\n",
-          stats->lossless_hdr_size, stats->lossless_data_size);
  if (stats->lossless_features) {
    fprintf(stderr, "  * Lossless features used:");
    if (stats->lossless_features & 1) fprintf(stderr, " PREDICTION");
@ -279,6 +266,10 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
        fprintf(stderr, "             transparency:   %6d (%.1f dB)\n",
                stats->alpha_data_size, stats->PSNR[4]);
      }
+      if (stats->layer_data_size) {
+        fprintf(stderr, "             enhancement:    %6d\n",
+                stats->layer_data_size);
+      }
      fprintf(stderr, " Residuals bytes  "
                      "|segment 1|segment 2|segment 3"
                      "|segment 4|  total\n");
@ -307,9 +298,6 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
      PrintFullLosslessInfo(stats, "alpha");
    }
  }
-}
-
-static void PrintMapInfo(const WebPPicture* const pic) {
  if (pic->extra_info != NULL) {
    const int mb_w = (pic->width + 15) / 16;
    const int mb_h = (pic->height + 15) / 16;
@ -319,18 +307,18 @@ static void PrintMapInfo(const WebPPicture* const pic) {
      for (x = 0; x < mb_w; ++x) {
        const int c = pic->extra_info[x + y * mb_w];
        if (type == 1) {   // intra4/intra16
-          fprintf(stderr, "%c", "+."[c]);
+          printf("%c", "+."[c]);
        } else if (type == 2) {    // segments
-          fprintf(stderr, "%c", ".-*X"[c]);
+          printf("%c", ".-*X"[c]);
        } else if (type == 3) {    // quantizers
-          fprintf(stderr, "%.2d ", c);
+          printf("%.2d ", c);
        } else if (type == 6 || type == 7) {
-          fprintf(stderr, "%3d ", c);
+          printf("%3d ", c);
        } else {
-          fprintf(stderr, "0x%.2x ", c);
+          printf("0x%.2x ", c);
        }
      }
-      fprintf(stderr, "\n");
+      printf("\n");
    }
  }
 }
@ -506,14 +494,11 @@ static int WriteWebPWithMetadata(FILE* const out,
    if (has_vp8x) {  // update the existing VP8X flags
      webp[kChunkHeaderSize] |= (uint8_t)(flags & 0xff);
      ok = ok && (fwrite(webp, kVP8XChunkSize, 1, out) == 1);
-      webp += kVP8XChunkSize;
      webp_size -= kVP8XChunkSize;
    } else {
      const int is_lossless = !memcmp(webp, "VP8L", kTagSize);
-      if (is_lossless) {
-        // Presence of alpha is stored in the 29th bit of VP8L data.
-        if (webp[kChunkHeaderSize + 3] & (1 << 5)) flags |= kAlphaFlag;
-      }
+      // The alpha flag is forced with lossless images.
+      if (is_lossless) flags |= kAlphaFlag;
      ok = ok && (fwrite(kVP8XHeader, kChunkHeaderSize, 1, out) == 1);
      ok = ok && WriteLE32(out, flags);
      ok = ok && WriteLE24(out, picture->width - 1);
@ -543,8 +528,9 @@ static int WriteWebPWithMetadata(FILE* const out,
 //------------------------------------------------------------------------------

 static int ProgressReport(int percent, const WebPPicture* const picture) {
-  fprintf(stderr, "[%s]: %3d %%      \r",
-          (char*)picture->user_data, percent);
+  printf("[%s]: %3d %%      \r",
+         (char*)picture->user_data, percent);
+  fflush(stdout);
  return 1;  // all ok
 }

@ -561,37 +547,35 @@ static void HelpShort(void) {
 static void HelpLong(void) {
  printf("Usage:\n");
  printf(" cwebp [-preset <...>] [options] in_file [-o out_file]\n\n");
-  printf("If input size (-s) for an image is not specified, it is\n"
-         "assumed to be a PNG, JPEG, TIFF or WebP file.\n");
+  printf("If input size (-s) for an image is not specified, "
+         "it is assumed to be a PNG, JPEG or TIFF file.\n");
 #ifdef HAVE_WINCODEC_H
-  printf("Windows builds can take as input any of the files handled by WIC.\n");
+  printf("Windows builds can take as input any of the files handled by WIC\n");
 #endif
-  printf("\nOptions:\n");
+  printf("options:\n");
  printf("  -h / -help  ............ short help\n");
  printf("  -H / -longhelp  ........ long help\n");
  printf("  -q <float> ............. quality factor (0:small..100:big)\n");
-  printf("  -alpha_q <int> ......... transparency-compression quality "
-         "(0..100)\n");
-  printf("  -preset <string> ....... preset setting, one of:\n");
+  printf("  -alpha_q <int> ......... Transparency-compression quality "
+         "(0..100).\n");
+  printf("  -preset <string> ....... Preset setting, one of:\n");
  printf("                            default, photo, picture,\n");
  printf("                            drawing, icon, text\n");
-  printf("     -preset must come first, as it overwrites other parameters\n");
-  printf("  -z <int> ............... activates lossless preset with given\n"
-         "                           level in [0:fast, ..., 9:slowest]\n");
+  printf("     -preset must come first, as it overwrites other parameters.");
  printf("\n");
  printf("  -m <int> ............... compression method (0=fast, 6=slowest)\n");
  printf("  -segments <int> ........ number of segments to use (1..4)\n");
-  printf("  -size <int> ............ target size (in bytes)\n");
-  printf("  -psnr <float> .......... target PSNR (in dB. typically: 42)\n");
+  printf("  -size <int> ............ Target size (in bytes)\n");
+  printf("  -psnr <float> .......... Target PSNR (in dB. typically: 42)\n");
  printf("\n");
-  printf("  -s <int> <int> ......... input size (width x height) for YUV\n");
-  printf("  -sns <int> ............. spatial noise shaping (0:off, 100:max)\n");
+  printf("  -s <int> <int> ......... Input size (width x height) for YUV\n");
+  printf("  -sns <int> ............. Spatial Noise Shaping (0:off, 100:max)\n");
  printf("  -f <int> ............... filter strength (0=off..100)\n");
  printf("  -sharpness <int> ....... "
         "filter sharpness (0:most .. 7:least sharp)\n");
  printf("  -strong ................ use strong filter instead "
-                                     "of simple (default)\n");
-  printf("  -nostrong .............. use simple filter instead of strong\n");
+                                     "of simple (default).\n");
+  printf("  -nostrong .............. use simple filter instead of strong.\n");
  printf("  -partition_limit <int> . limit quality to fit the 512k limit on\n");
  printf("                           "
         "the first partition (0=no degradation ... 100=full)\n");
@ -600,29 +584,22 @@ static void HelpLong(void) {
  printf("  -resize <w> <h> ........ resize picture (after any cropping)\n");
  printf("  -mt .................... use multi-threading if available\n");
  printf("  -low_memory ............ reduce memory usage (slower encoding)\n");
-  printf("  -map <int> ............. print map of extra info\n");
-  printf("  -print_psnr ............ prints averaged PSNR distortion\n");
-  printf("  -print_ssim ............ prints averaged SSIM distortion\n");
-  printf("  -print_lsim ............ prints local-similarity distortion\n");
-  printf("  -d <file.pgm> .......... dump the compressed output (PGM file)\n");
-  printf("  -alpha_method <int> .... transparency-compression method (0..1)\n");
-  printf("  -alpha_filter <string> . predictive filtering for alpha plane,\n");
-  printf("                           one of: none, fast (default) or best\n");
-  printf("  -exact ................. preserve RGB values in transparent area"
-         "\n");
-  printf("  -blend_alpha <hex> ..... blend colors against background color\n"
-         "                           expressed as RGB values written in\n"
-         "                           hexadecimal, e.g. 0xc0e0d0 for red=0xc0\n"
-         "                           green=0xe0 and blue=0xd0\n");
-  printf("  -noalpha ............... discard any transparency information\n");
-  printf("  -lossless .............. encode image losslessly\n");
-  printf("  -near_lossless <int> ... use near-lossless image\n"
-         "                           preprocessing (0..100=off)\n");
 #ifdef WEBP_EXPERIMENTAL_FEATURES
-  printf("  -delta_palettization ... use delta palettization\n");
-#endif  // WEBP_EXPERIMENTAL_FEATURES
-  printf("  -hint <string> ......... specify image characteristics hint,\n");
-  printf("                           one of: photo, picture or graph\n");
+  printf("  -444 / -422 / -gray ..... Change colorspace\n");
+#endif
+  printf("  -map <int> ............. print map of extra info.\n");
+  printf("  -print_psnr ............ prints averaged PSNR distortion.\n");
+  printf("  -print_ssim ............ prints averaged SSIM distortion.\n");
+  printf("  -print_lsim ............ prints local-similarity distortion.\n");
+  printf("  -d <file.pgm> .......... dump the compressed output (PGM file).\n");
+  printf("  -alpha_method <int> .... Transparency-compression method (0..1)\n");
+  printf("  -alpha_filter <string> . predictive filtering for alpha plane.\n");
+  printf("                           One of: none, fast (default) or best.\n");
+  printf("  -alpha_cleanup ......... Clean RGB values in transparent area.\n");
+  printf("  -noalpha ............... discard any transparency information.\n");
+  printf("  -lossless .............. Encode image losslessly.\n");
+  printf("  -hint <string> ......... Specify image characteristics hint.\n");
+  printf("                           One of: photo, picture or graph\n");

  printf("\n");
  printf("  -metadata <string> ..... comma separated list of metadata to\n");
@ -633,18 +610,18 @@ static void HelpLong(void) {

  printf("\n");
  printf("  -short ................. condense printed message\n");
-  printf("  -quiet ................. don't print anything\n");
-  printf("  -version ............... print version number and exit\n");
+  printf("  -quiet ................. don't print anything.\n");
+  printf("  -version ............... print version number and exit.\n");
 #ifndef WEBP_DLL
-  printf("  -noasm ................. disable all assembly optimizations\n");
+  printf("  -noasm ................. disable all assembly optimizations.\n");
 #endif
  printf("  -v ..................... verbose, e.g. print encoding/decoding "
         "times\n");
  printf("  -progress .............. report encoding progress\n");
  printf("\n");
  printf("Experimental Options:\n");
-  printf("  -jpeg_like ............. roughly match expected JPEG size\n");
-  printf("  -af .................... auto-adjust filter strength\n");
+  printf("  -jpeg_like ............. Roughly match expected JPEG size.\n");
+  printf("  -af .................... auto-adjust filter strength.\n");
  printf("  -pre <int> ............. pre-processing filter\n");
  printf("\n");
 }
@ -652,7 +629,7 @@ static void HelpLong(void) {
 //------------------------------------------------------------------------------
 // Error messages

-static const char* const kErrorMessages[VP8_ENC_ERROR_LAST] = {
+static const char* const kErrorMessages[] = {
  "OK",
  "OUT_OF_MEMORY: Out of memory allocating objects",
  "BITSTREAM_OUT_OF_MEMORY: Out of memory re-allocating byte buffer",
@ -681,12 +658,8 @@ int main(int argc, const char *argv[]) {
  int short_output = 0;
  int quiet = 0;
  int keep_alpha = 1;
-  int blend_alpha = 0;
-  uint32_t background_color = 0xffffffu;
  int crop = 0, crop_x = 0, crop_y = 0, crop_w = 0, crop_h = 0;
  int resize_w = 0, resize_h = 0;
-  int lossless_preset = 6;
-  int use_lossless_preset = -1;  // -1=unset, 0=don't use, 1=use it
  int show_progress = 0;
  int keep_metadata = 0;
  int metadata_written = 0;
@ -714,7 +687,6 @@ int main(int argc, const char *argv[]) {
  }

  for (c = 1; c < argc; ++c) {
-    int parse_error = 0;
    if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
      HelpShort();
      return 0;
@ -736,40 +708,20 @@ int main(int argc, const char *argv[]) {
      config.show_compressed = 1;
      print_distortion = 2;
    } else if (!strcmp(argv[c], "-short")) {
-      ++short_output;
+      short_output++;
    } else if (!strcmp(argv[c], "-s") && c < argc - 2) {
-      picture.width = ExUtilGetInt(argv[++c], 0, &parse_error);
-      picture.height = ExUtilGetInt(argv[++c], 0, &parse_error);
-      if (picture.width > WEBP_MAX_DIMENSION || picture.width < 0 ||
-          picture.height > WEBP_MAX_DIMENSION ||  picture.height < 0) {
-        fprintf(stderr,
-                "Specified dimension (%d x %d) is out of range.\n",
-                picture.width, picture.height);
-        goto Error;
-      }
+      picture.width = strtol(argv[++c], NULL, 0);
+      picture.height = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-m") && c < argc - 1) {
-      config.method = ExUtilGetInt(argv[++c], 0, &parse_error);
-      use_lossless_preset = 0;   // disable -z option
+      config.method = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-q") && c < argc - 1) {
-      config.quality = ExUtilGetFloat(argv[++c], &parse_error);
-      use_lossless_preset = 0;   // disable -z option
-    } else if (!strcmp(argv[c], "-z") && c < argc - 1) {
-      lossless_preset = ExUtilGetInt(argv[++c], 0, &parse_error);
-      if (use_lossless_preset != 0) use_lossless_preset = 1;
+      config.quality = (float)strtod(argv[++c], NULL);
    } else if (!strcmp(argv[c], "-alpha_q") && c < argc - 1) {
-      config.alpha_quality = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.alpha_quality = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-alpha_method") && c < argc - 1) {
-      config.alpha_compression = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.alpha_compression = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-alpha_cleanup")) {
-      // This flag is obsolete, does opposite of -exact.
-      config.exact = 0;
-    } else if (!strcmp(argv[c], "-exact")) {
-      config.exact = 1;
-    } else if (!strcmp(argv[c], "-blend_alpha") && c < argc - 1) {
-      blend_alpha = 1;
-      // background color is given in hex with an optional '0x' prefix
-      background_color = ExUtilGetInt(argv[++c], 16, &parse_error);
-      background_color = background_color & 0x00ffffffu;
+      keep_alpha = keep_alpha ? 2 : 0;
    } else if (!strcmp(argv[c], "-alpha_filter") && c < argc - 1) {
      ++c;
      if (!strcmp(argv[c], "none")) {
@ -786,14 +738,7 @@ int main(int argc, const char *argv[]) {
      keep_alpha = 0;
    } else if (!strcmp(argv[c], "-lossless")) {
      config.lossless = 1;
-    } else if (!strcmp(argv[c], "-near_lossless") && c < argc - 1) {
-      config.near_lossless = ExUtilGetInt(argv[++c], 0, &parse_error);
-      config.lossless = 1;  // use near-lossless only with lossless
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-    } else if (!strcmp(argv[c], "-delta_palettization")) {
-      config.delta_palettization = 1;
-      config.lossless = 1;  // use delta-palettization only with lossless
-#endif  // WEBP_EXPERIMENTAL_FEATURES
+      picture.use_argb = 1;
    } else if (!strcmp(argv[c], "-hint") && c < argc - 1) {
      ++c;
      if (!strcmp(argv[c], "photo")) {
@ -807,13 +752,13 @@ int main(int argc, const char *argv[]) {
        goto Error;
      }
    } else if (!strcmp(argv[c], "-size") && c < argc - 1) {
-      config.target_size = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.target_size = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-psnr") && c < argc - 1) {
-      config.target_PSNR = ExUtilGetFloat(argv[++c], &parse_error);
+      config.target_PSNR = (float)strtod(argv[++c], NULL);
    } else if (!strcmp(argv[c], "-sns") && c < argc - 1) {
-      config.sns_strength = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.sns_strength = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-f") && c < argc - 1) {
-      config.filter_strength = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.filter_strength = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-af")) {
      config.autofilter = 1;
    } else if (!strcmp(argv[c], "-jpeg_like")) {
@ -827,26 +772,34 @@ int main(int argc, const char *argv[]) {
    } else if (!strcmp(argv[c], "-nostrong")) {
      config.filter_type = 0;
    } else if (!strcmp(argv[c], "-sharpness") && c < argc - 1) {
-      config.filter_sharpness = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.filter_sharpness = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-pass") && c < argc - 1) {
-      config.pass = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.pass = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-pre") && c < argc - 1) {
-      config.preprocessing = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.preprocessing = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-segments") && c < argc - 1) {
-      config.segments = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.segments = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-partition_limit") && c < argc - 1) {
-      config.partition_limit = ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.partition_limit = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-map") && c < argc - 1) {
-      picture.extra_info_type = ExUtilGetInt(argv[++c], 0, &parse_error);
+      picture.extra_info_type = strtol(argv[++c], NULL, 0);
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    } else if (!strcmp(argv[c], "-444")) {
+      picture.colorspace = WEBP_YUV444;
+    } else if (!strcmp(argv[c], "-422")) {
+      picture.colorspace = WEBP_YUV422;
+    } else if (!strcmp(argv[c], "-gray")) {
+      picture.colorspace = WEBP_YUV400;
+#endif
    } else if (!strcmp(argv[c], "-crop") && c < argc - 4) {
      crop = 1;
-      crop_x = ExUtilGetInt(argv[++c], 0, &parse_error);
-      crop_y = ExUtilGetInt(argv[++c], 0, &parse_error);
-      crop_w = ExUtilGetInt(argv[++c], 0, &parse_error);
-      crop_h = ExUtilGetInt(argv[++c], 0, &parse_error);
+      crop_x = strtol(argv[++c], NULL, 0);
+      crop_y = strtol(argv[++c], NULL, 0);
+      crop_w = strtol(argv[++c], NULL, 0);
+      crop_h = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-resize") && c < argc - 2) {
-      resize_w = ExUtilGetInt(argv[++c], 0, &parse_error);
-      resize_h = ExUtilGetInt(argv[++c], 0, &parse_error);
+      resize_w = strtol(argv[++c], NULL, 0);
+      resize_h = strtol(argv[++c], NULL, 0);
 #ifndef WEBP_DLL
    } else if (!strcmp(argv[c], "-noasm")) {
      VP8GetCPUInfo = NULL;
@ -854,7 +807,7 @@ int main(int argc, const char *argv[]) {
    } else if (!strcmp(argv[c], "-version")) {
      const int version = WebPGetEncoderVersion();
      printf("%d.%d.%d\n",
-             (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+        (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
      return 0;
    } else if (!strcmp(argv[c], "-progress")) {
      show_progress = 1;
@ -931,9 +884,6 @@ int main(int argc, const char *argv[]) {
 #endif
    } else if (!strcmp(argv[c], "-v")) {
      verbose = 1;
-    } else if (!strcmp(argv[c], "--")) {
-      if (c < argc - 1) in_file = argv[++c];
-      break;
    } else if (argv[c][0] == '-') {
      fprintf(stderr, "Error! Unknown option '%s'\n", argv[c]);
      HelpLong();
@ -941,11 +891,6 @@ int main(int argc, const char *argv[]) {
    } else {
      in_file = argv[c];
    }
-
-    if (parse_error) {
-      HelpLong();
-      return -1;
-    }
  }
  if (in_file == NULL) {
    fprintf(stderr, "No input file specified!\n");
@ -953,13 +898,6 @@ int main(int argc, const char *argv[]) {
    goto Error;
  }

-  if (use_lossless_preset == 1) {
-    if (!WebPConfigLosslessPreset(&config, lossless_preset)) {
-      fprintf(stderr, "Invalid lossless preset (-z %d)\n", lossless_preset);
-      goto Error;
-    }
-  }
-
  // Check for unsupported command line options for lossless mode and log
  // warning for such options.
  if (!quiet && config.lossless == 1) {
@ -978,13 +916,9 @@ int main(int argc, const char *argv[]) {
    goto Error;
  }

-  // Read the input. We need to decide if we prefer ARGB or YUVA
-  // samples, depending on the expected compression mode (this saves
-  // some conversion steps).
-  picture.use_argb = (config.lossless || config.preprocessing > 0 ||
-                      crop || (resize_w | resize_h) > 0);
+  // Read the input
  if (verbose) {
-    StopwatchReset(&stop_watch);
+    StopwatchReadAndReset(&stop_watch);
  }
  if (!ReadPicture(in_file, &picture, keep_alpha,
                   (keep_metadata == 0) ? NULL : &metadata)) {
@ -992,9 +926,8 @@ int main(int argc, const char *argv[]) {
    goto Error;
  }
  picture.progress_hook = (show_progress && !quiet) ? ProgressReport : NULL;
-
-  if (blend_alpha) {
-    WebPBlendAlpha(&picture, background_color);
+  if (keep_alpha == 2) {
+    WebPCleanupTransparentArea(&picture);
  }

  if (verbose) {
@ -1003,9 +936,8 @@ int main(int argc, const char *argv[]) {
  }

  // Open the output
-  if (out_file != NULL) {
-    const int use_stdout = !strcmp(out_file, "-");
-    out = use_stdout ? ExUtilSetBinaryMode(stdout) : fopen(out_file, "wb");
+  if (out_file) {
+    out = fopen(out_file, "wb");
    if (out == NULL) {
      fprintf(stderr, "Error! Cannot open output file '%s'\n", out_file);
      goto Error;
@ -1033,9 +965,9 @@ int main(int argc, const char *argv[]) {
    picture.user_data = (void*)in_file;
  }

-  // Crop & resize.
+  // Compress
  if (verbose) {
-    StopwatchReset(&stop_watch);
+    StopwatchReadAndReset(&stop_watch);
  }
  if (crop != 0) {
    // We use self-cropping using a view.
@ -1050,22 +982,12 @@ int main(int argc, const char *argv[]) {
      goto Error;
    }
  }
-  if (verbose && (crop != 0 || (resize_w | resize_h) > 0)) {
-    const double preproc_time = StopwatchReadAndReset(&stop_watch);
-    fprintf(stderr, "Time to crop/resize picture: %.3fs\n", preproc_time);
-  }
-
  if (picture.extra_info_type > 0) {
    AllocExtraInfo(&picture);
  }
  if (print_distortion >= 0) {  // Save original picture for later comparison
    WebPPictureCopy(&picture, &original_picture);
  }
-
-  // Compress.
-  if (verbose) {
-    StopwatchReset(&stop_watch);
-  }
  if (!WebPEncode(&config, &picture)) {
    fprintf(stderr, "Error! Cannot encode picture as WebP\n");
    fprintf(stderr, "Error code: %d (%s)\n",
@ -1086,94 +1008,42 @@ int main(int argc, const char *argv[]) {
    }
  }

-  if (keep_metadata != 0) {
-    if (out != NULL) {
-      if (!WriteWebPWithMetadata(out, &picture, &memory_writer,
-                                 &metadata, keep_metadata, &metadata_written)) {
-        fprintf(stderr, "Error writing WebP file with metadata!\n");
-        goto Error;
-      }
-    } else {  // output is disabled, just display the metadata stats.
-      const struct {
-        const MetadataPayload* const payload;
-        int flag;
-      } *iter, info[] = {
-        { &metadata.exif, METADATA_EXIF },
-        { &metadata.iccp, METADATA_ICC },
-        { &metadata.xmp, METADATA_XMP },
-        { NULL, 0 }
-      };
-      uint32_t unused1 = 0;
-      uint64_t unused2 = 0;
-
-      for (iter = info; iter->payload != NULL; ++iter) {
-        if (UpdateFlagsAndSize(iter->payload, !!(keep_metadata & iter->flag),
-                               0, &unused1, &unused2)) {
-          metadata_written |= iter->flag;
-        }
-      }
+  if (keep_metadata != 0 && out != NULL) {
+    if (!WriteWebPWithMetadata(out, &picture, &memory_writer,
+                               &metadata, keep_metadata, &metadata_written)) {
+      fprintf(stderr, "Error writing WebP file with metadata!\n");
+      goto Error;
    }
  }

  if (!quiet) {
-    if (!short_output || print_distortion < 0) {
-      if (config.lossless) {
-        PrintExtraInfoLossless(&picture, short_output, in_file);
-      } else {
-        PrintExtraInfoLossy(&picture, short_output, config.low_memory, in_file);
-      }
-    }
-    if (!short_output && picture.extra_info_type > 0) {
-      PrintMapInfo(&picture);
-    }
-    if (print_distortion >= 0) {    // print distortion
-      static const char* distortion_names[] = { "PSNR", "SSIM", "LSIM" };
-      float values[5];
-      if (picture.use_argb != original_picture.use_argb) {
-        // Somehow, the WebPEncode() call converted the original picture.
-        // We need to make both match before calling WebPPictureDistortion().
-        int ok = 0;
-        if (picture.use_argb) {
-          ok = WebPPictureYUVAToARGB(&original_picture);
-        } else {
-          ok = WebPPictureARGBToYUVA(&original_picture, WEBP_YUV420A);
-        }
-        if (!ok) {
-          fprintf(stderr, "Error while converting original picture.\n");
-          goto Error;
-        }
-      }
-      if (!WebPPictureDistortion(&picture, &original_picture,
-                                 print_distortion, values)) {
-        fprintf(stderr, "Error while computing the distortion.\n");
-        goto Error;
-      }
-      if (!short_output) {
-        fprintf(stderr, "%s: ", distortion_names[print_distortion]);
-        if (picture.use_argb) {
-          fprintf(stderr, "B:%.2f G:%.2f R:%.2f A:%.2f  Total:%.2f\n",
-                  values[0], values[1], values[2], values[3], values[4]);
-        } else {
-          fprintf(stderr, "Y:%.2f U:%.2f V:%.2f A:%.2f  Total:%.2f\n",
-                  values[0], values[1], values[2], values[3], values[4]);
-        }
-      } else {
-        fprintf(stderr, "%7d %.4f\n", picture.stats->coded_size, values[4]);
-      }
+    if (config.lossless) {
+      PrintExtraInfoLossless(&picture, short_output, in_file);
+    } else {
+      PrintExtraInfoLossy(&picture, short_output, config.low_memory, in_file);
    }
    if (!short_output) {
      PrintMetadataInfo(&metadata, metadata_written);
    }
  }
+  if (!quiet && !short_output && print_distortion >= 0) {  // print distortion
+    static const char* distortion_names[] = { "PSNR", "SSIM", "LSIM" };
+    float values[5];
+    WebPPictureDistortion(&picture, &original_picture,
+                          print_distortion, values);
+    fprintf(stderr, "%s: Y:%.2f U:%.2f V:%.2f A:%.2f  Total:%.2f\n",
+            distortion_names[print_distortion],
+            values[0], values[1], values[2], values[3], values[4]);
+  }
  return_value = 0;

 Error:
-  WebPMemoryWriterClear(&memory_writer);
+  free(memory_writer.mem);
  free(picture.extra_info);
  MetadataFree(&metadata);
  WebPPictureFree(&picture);
  WebPPictureFree(&original_picture);
-  if (out != NULL && out != stdout) {
+  if (out != NULL) {
    fclose(out);
  }

--- a/examples/dwebp.c
+++ b/examples/dwebp.c
@ -17,12 +17,11 @@
 #include <string.h>

 #ifdef HAVE_CONFIG_H
-#include "webp/config.h"
+#include "config.h"
 #endif

 #ifdef WEBP_HAVE_PNG
 #include <png.h>
-#include <setjmp.h>   // note: this must be included *after* png.h
 #endif

 #ifdef HAVE_WINCODEC_H
@ -33,7 +32,6 @@
 #define COBJMACROS
 #define _WIN32_IE 0x500  // Workaround bug in shlwapi.h when compiling C++
                         // code with COBJMACROS.
-#include <ole2.h>  // CreateStreamOnHGlobal()
 #include <shlwapi.h>
 #include <windows.h>
 #include <wincodec.h>
@ -44,15 +42,14 @@
 #include "./stopwatch.h"

 static int verbose = 0;
-static int quiet = 0;
 #ifndef WEBP_DLL
-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

 extern void* VP8GetCPUInfo;   // opaque forward declaration.

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif
 #endif  // WEBP_DLL
@ -65,8 +62,6 @@ typedef enum {
  PAM,
  PPM,
  PGM,
-  BMP,
-  TIFF,
  YUV,
  ALPHA_PLANE_ONLY  // this is for experimenting only
 } OutputFileFormat;
@ -87,15 +82,9 @@ typedef enum {
 #define MAKE_REFGUID(x) &(x)
 #endif

-static HRESULT CreateOutputStream(const char* out_file_name,
-                                  int write_to_mem, IStream** stream) {
+static HRESULT CreateOutputStream(const char* out_file_name, IStream** stream) {
  HRESULT hr = S_OK;
-  if (write_to_mem) {
-    // Output to a memory buffer. This is freed when 'stream' is released.
-    IFS(CreateStreamOnHGlobal(NULL, TRUE, stream));
-  } else {
-    IFS(SHCreateStreamOnFileA(out_file_name, STGM_WRITE | STGM_CREATE, stream));
-  }
+  IFS(SHCreateStreamOnFileA(out_file_name, STGM_WRITE | STGM_CREATE, stream));
  if (FAILED(hr)) {
    fprintf(stderr, "Error opening output file %s (%08lx)\n",
            out_file_name, hr);
@ -103,9 +92,8 @@ static HRESULT CreateOutputStream(const char* out_file_name,
  return hr;
 }

-static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
-                             REFGUID container_guid,
-                             uint8_t* rgb, int stride,
+static HRESULT WriteUsingWIC(const char* out_file_name, REFGUID container_guid,
+                             unsigned char* rgb, int stride,
                             uint32_t width, uint32_t height, int has_alpha) {
  HRESULT hr = S_OK;
  IWICImagingFactory* factory = NULL;
@ -126,7 +114,7 @@ static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
            "Windows XP SP3 or newer?). PNG support not available. "
            "Use -ppm or -pgm for available PPM and PGM formats.\n");
  }
-  IFS(CreateOutputStream(out_file_name, use_stdout, &stream));
+  IFS(CreateOutputStream(out_file_name, &stream));
  IFS(IWICImagingFactory_CreateEncoder(factory, container_guid, NULL,
                                       &encoder));
  IFS(IWICBitmapEncoder_Initialize(encoder, stream,
@ -140,28 +128,6 @@ static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
  IFS(IWICBitmapFrameEncode_Commit(frame));
  IFS(IWICBitmapEncoder_Commit(encoder));

-  if (SUCCEEDED(hr) && use_stdout) {
-    HGLOBAL image;
-    IFS(GetHGlobalFromStream(stream, &image));
-    if (SUCCEEDED(hr)) {
-      HANDLE std_output = GetStdHandle(STD_OUTPUT_HANDLE);
-      DWORD mode;
-      const BOOL update_mode = GetConsoleMode(std_output, &mode);
-      const void* const image_mem = GlobalLock(image);
-      DWORD bytes_written = 0;
-
-      // Clear output processing if necessary, then output the image.
-      if (update_mode) SetConsoleMode(std_output, 0);
-      if (!WriteFile(std_output, image_mem, (DWORD)GlobalSize(image),
-                     &bytes_written, NULL) ||
-          bytes_written != GlobalSize(image)) {
-        hr = E_FAIL;
-      }
-      if (update_mode) SetConsoleMode(std_output, mode);
-      GlobalUnlock(image);
-    }
-  }
-
  if (frame != NULL) IUnknown_Release(frame);
  if (encoder != NULL) IUnknown_Release(encoder);
  if (factory != NULL) IUnknown_Release(factory);
@ -169,21 +135,21 @@ static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
  return hr;
 }

-static int WritePNG(const char* out_file_name, int use_stdout,
+static int WritePNG(const char* out_file_name,
                    const WebPDecBuffer* const buffer) {
  const uint32_t width = buffer->width;
  const uint32_t height = buffer->height;
-  uint8_t* const rgb = buffer->u.RGBA.rgba;
+  unsigned char* const rgb = buffer->u.RGBA.rgba;
  const int stride = buffer->u.RGBA.stride;
  const int has_alpha = (buffer->colorspace == MODE_BGRA);

-  return SUCCEEDED(WriteUsingWIC(out_file_name, use_stdout,
+  return SUCCEEDED(WriteUsingWIC(out_file_name,
                                 MAKE_REFGUID(GUID_ContainerFormatPng),
                                 rgb, stride, width, height, has_alpha));
 }

 #elif defined(WEBP_HAVE_PNG)    // !HAVE_WINCODEC_H
-static void PNGAPI PNGErrorFunction(png_structp png, png_const_charp dummy) {
+static void PNGAPI error_function(png_structp png, png_const_charp dummy) {
  (void)dummy;  // remove variable-unused warning
  longjmp(png_jmpbuf(png), 1);
 }
@ -191,25 +157,25 @@ static void PNGAPI PNGErrorFunction(png_structp png, png_const_charp dummy) {
 static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
  const uint32_t width = buffer->width;
  const uint32_t height = buffer->height;
-  uint8_t* const rgb = buffer->u.RGBA.rgba;
+  unsigned char* const rgb = buffer->u.RGBA.rgba;
  const int stride = buffer->u.RGBA.stride;
  const int has_alpha = (buffer->colorspace == MODE_RGBA);
-  volatile png_structp png;
-  volatile png_infop info;
+  png_structp png;
+  png_infop info;
  png_uint_32 y;

  png = png_create_write_struct(PNG_LIBPNG_VER_STRING,
-                                NULL, PNGErrorFunction, NULL);
+                                NULL, error_function, NULL);
  if (png == NULL) {
    return 0;
  }
  info = png_create_info_struct(png);
  if (info == NULL) {
-    png_destroy_write_struct((png_structpp)&png, NULL);
+    png_destroy_write_struct(&png, NULL);
    return 0;
  }
  if (setjmp(png_jmpbuf(png))) {
-    png_destroy_write_struct((png_structpp)&png, (png_infopp)&info);
+    png_destroy_write_struct(&png, &info);
    return 0;
  }
  png_init_io(png, out_file);
@ -223,7 +189,7 @@ static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
    png_write_rows(png, &row, 1);
  }
  png_write_end(png, info);
-  png_destroy_write_struct((png_structpp)&png, (png_infopp)&info);
+  png_destroy_write_struct(&png, &info);
  return 1;
 }
 #else    // !HAVE_WINCODEC_H && !WEBP_HAVE_PNG
@ -240,16 +206,16 @@ static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
 static int WritePPM(FILE* fout, const WebPDecBuffer* const buffer, int alpha) {
  const uint32_t width = buffer->width;
  const uint32_t height = buffer->height;
-  const uint8_t* const rgb = buffer->u.RGBA.rgba;
+  const unsigned char* const rgb = buffer->u.RGBA.rgba;
  const int stride = buffer->u.RGBA.stride;
  const size_t bytes_per_px = alpha ? 4 : 3;
  uint32_t y;

  if (alpha) {
-    fprintf(fout, "P7\nWIDTH %u\nHEIGHT %u\nDEPTH 4\nMAXVAL 255\n"
+    fprintf(fout, "P7\nWIDTH %d\nHEIGHT %d\nDEPTH 4\nMAXVAL 255\n"
                  "TUPLTYPE RGB_ALPHA\nENDHDR\n", width, height);
  } else {
-    fprintf(fout, "P6\n%u %u\n255\n", width, height);
+    fprintf(fout, "P6\n%d %d\n255\n", width, height);
  }
  for (y = 0; y < height; ++y) {
    if (fwrite(rgb + y * stride, width, bytes_per_px, fout) != bytes_per_px) {
@ -259,154 +225,14 @@ static int WritePPM(FILE* fout, const WebPDecBuffer* const buffer, int alpha) {
  return 1;
 }

-static void PutLE16(uint8_t* const dst, uint32_t value) {
-  dst[0] = (value >> 0) & 0xff;
-  dst[1] = (value >> 8) & 0xff;
-}
-
-static void PutLE32(uint8_t* const dst, uint32_t value) {
-  PutLE16(dst + 0, (value >>  0) & 0xffff);
-  PutLE16(dst + 2, (value >> 16) & 0xffff);
-}
-
-#define BMP_HEADER_SIZE 54
-static int WriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
-  const int has_alpha = (buffer->colorspace != MODE_BGR);
-  const uint32_t width = buffer->width;
-  const uint32_t height = buffer->height;
-  const uint8_t* const rgba = buffer->u.RGBA.rgba;
-  const int stride = buffer->u.RGBA.stride;
-  const uint32_t bytes_per_px = has_alpha ? 4 : 3;
-  uint32_t y;
-  const uint32_t line_size = bytes_per_px * width;
-  const uint32_t bmp_stride = (line_size + 3) & ~3;   // pad to 4
-  const uint32_t total_size = bmp_stride * height + BMP_HEADER_SIZE;
-  uint8_t bmp_header[BMP_HEADER_SIZE] = { 0 };
-
-  // bitmap file header
-  PutLE16(bmp_header + 0, 0x4d42);                // signature 'BM'
-  PutLE32(bmp_header + 2, total_size);            // size including header
-  PutLE32(bmp_header + 6, 0);                     // reserved
-  PutLE32(bmp_header + 10, BMP_HEADER_SIZE);      // offset to pixel array
-  // bitmap info header
-  PutLE32(bmp_header + 14, 40);                   // DIB header size
-  PutLE32(bmp_header + 18, width);                // dimensions
-  PutLE32(bmp_header + 22, -(int)height);         // vertical flip!
-  PutLE16(bmp_header + 26, 1);                    // number of planes
-  PutLE16(bmp_header + 28, bytes_per_px * 8);     // bits per pixel
-  PutLE32(bmp_header + 30, 0);                    // no compression (BI_RGB)
-  PutLE32(bmp_header + 34, 0);                    // image size (dummy)
-  PutLE32(bmp_header + 38, 2400);                 // x pixels/meter
-  PutLE32(bmp_header + 42, 2400);                 // y pixels/meter
-  PutLE32(bmp_header + 46, 0);                    // number of palette colors
-  PutLE32(bmp_header + 50, 0);                    // important color count
-
-  // TODO(skal): color profile
-
-  // write header
-  if (fwrite(bmp_header, sizeof(bmp_header), 1, fout) != 1) {
-    return 0;
-  }
-
-  // write pixel array
-  for (y = 0; y < height; ++y) {
-    if (fwrite(rgba + y * stride, line_size, 1, fout) != 1) {
-      return 0;
-    }
-    // write padding zeroes
-    if (bmp_stride != line_size) {
-      const uint8_t zeroes[3] = { 0 };
-      if (fwrite(zeroes, bmp_stride - line_size, 1, fout) != 1) {
-        return 0;
-      }
-    }
-  }
-  return 1;
-}
-#undef BMP_HEADER_SIZE
-
-#define NUM_IFD_ENTRIES 15
-#define EXTRA_DATA_SIZE 16
-// 10b for signature/header + n * 12b entries + 4b for IFD terminator:
-#define EXTRA_DATA_OFFSET (10 + 12 * NUM_IFD_ENTRIES + 4)
-#define TIFF_HEADER_SIZE (EXTRA_DATA_OFFSET + EXTRA_DATA_SIZE)
-
-static int WriteTIFF(FILE* fout, const WebPDecBuffer* const buffer) {
-  const int has_alpha = (buffer->colorspace != MODE_RGB);
-  const uint32_t width = buffer->width;
-  const uint32_t height = buffer->height;
-  const uint8_t* const rgba = buffer->u.RGBA.rgba;
-  const int stride = buffer->u.RGBA.stride;
-  const uint8_t bytes_per_px = has_alpha ? 4 : 3;
-  // For non-alpha case, we omit tag 0x152 (ExtraSamples).
-  const uint8_t num_ifd_entries = has_alpha ? NUM_IFD_ENTRIES
-                                            : NUM_IFD_ENTRIES - 1;
-  uint8_t tiff_header[TIFF_HEADER_SIZE] = {
-    0x49, 0x49, 0x2a, 0x00,   // little endian signature
-    8, 0, 0, 0,               // offset to the unique IFD that follows
-    // IFD (offset = 8). Entries must be written in increasing tag order.
-    num_ifd_entries, 0,       // Number of entries in the IFD (12 bytes each).
-    0x00, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,    //  10: Width  (TBD)
-    0x01, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,    //  22: Height (TBD)
-    0x02, 0x01, 3, 0, bytes_per_px, 0, 0, 0,     //  34: BitsPerSample: 8888
-        EXTRA_DATA_OFFSET + 0, 0, 0, 0,
-    0x03, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    //  46: Compression: none
-    0x06, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,    //  58: Photometric: RGB
-    0x11, 0x01, 4, 0, 1, 0, 0, 0,                //  70: Strips offset:
-        TIFF_HEADER_SIZE, 0, 0, 0,               //      data follows header
-    0x12, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    //  82: Orientation: topleft
-    0x15, 0x01, 3, 0, 1, 0, 0, 0,                //  94: SamplesPerPixels
-        bytes_per_px, 0, 0, 0,
-    0x16, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,    // 106: Rows per strip (TBD)
-    0x17, 0x01, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0,    // 118: StripByteCount (TBD)
-    0x1a, 0x01, 5, 0, 1, 0, 0, 0,                // 130: X-resolution
-        EXTRA_DATA_OFFSET + 8, 0, 0, 0,
-    0x1b, 0x01, 5, 0, 1, 0, 0, 0,                // 142: Y-resolution
-        EXTRA_DATA_OFFSET + 8, 0, 0, 0,
-    0x1c, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    // 154: PlanarConfiguration
-    0x28, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,    // 166: ResolutionUnit (inch)
-    0x52, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    // 178: ExtraSamples: rgbA
-    0, 0, 0, 0,                                  // 190: IFD terminator
-    // EXTRA_DATA_OFFSET:
-    8, 0, 8, 0, 8, 0, 8, 0,      // BitsPerSample
-    72, 0, 0, 0, 1, 0, 0, 0      // 72 pixels/inch, for X/Y-resolution
-  };
-  uint32_t y;
-
-  // Fill placeholders in IFD:
-  PutLE32(tiff_header + 10 + 8, width);
-  PutLE32(tiff_header + 22 + 8, height);
-  PutLE32(tiff_header + 106 + 8, height);
-  PutLE32(tiff_header + 118 + 8, width * bytes_per_px * height);
-  if (!has_alpha) PutLE32(tiff_header + 178, 0);  // IFD terminator
-
-  // write header
-  if (fwrite(tiff_header, sizeof(tiff_header), 1, fout) != 1) {
-    return 0;
-  }
-  // write pixel values
-  for (y = 0; y < height; ++y) {
-    if (fwrite(rgba + y * stride, bytes_per_px, width, fout) != width) {
-      return 0;
-    }
-  }
-
-  return 1;
-}
-
-#undef TIFF_HEADER_SIZE
-#undef EXTRA_DATA_OFFSET
-#undef EXTRA_DATA_SIZE
-#undef NUM_IFD_ENTRIES
-
 static int WriteAlphaPlane(FILE* fout, const WebPDecBuffer* const buffer) {
  const uint32_t width = buffer->width;
  const uint32_t height = buffer->height;
-  const uint8_t* const a = buffer->u.YUVA.a;
+  const unsigned char* const a = buffer->u.YUVA.a;
  const int a_stride = buffer->u.YUVA.a_stride;
  uint32_t y;
  assert(a != NULL);
-  fprintf(fout, "P5\n%u %u\n255\n", width, height);
+  fprintf(fout, "P5\n%d %d\n255\n", width, height);
  for (y = 0; y < height; ++y) {
    if (fwrite(a + y * a_stride, width, 1, fout) != 1) {
      return 0;
@ -463,33 +289,30 @@ static int WritePGMOrYUV(FILE* fout, const WebPDecBuffer* const buffer,
  return ok;
 }

-static int SaveOutput(const WebPDecBuffer* const buffer,
-                      OutputFileFormat format, const char* const out_file) {
+static void SaveOutput(const WebPDecBuffer* const buffer,
+                       OutputFileFormat format, const char* const out_file) {
  FILE* fout = NULL;
  int needs_open_file = 1;
-  const int use_stdout = !strcmp(out_file, "-");
  int ok = 1;
  Stopwatch stop_watch;

-  if (verbose) {
-    StopwatchReset(&stop_watch);
-  }
+  if (verbose)
+    StopwatchReadAndReset(&stop_watch);

 #ifdef HAVE_WINCODEC_H
  needs_open_file = (format != PNG);
 #endif
-
  if (needs_open_file) {
-    fout = use_stdout ? ExUtilSetBinaryMode(stdout) : fopen(out_file, "wb");
-    if (fout == NULL) {
+    fout = fopen(out_file, "wb");
+    if (!fout) {
      fprintf(stderr, "Error opening output file %s\n", out_file);
-      return 0;
+      return;
    }
  }

  if (format == PNG) {
 #ifdef HAVE_WINCODEC_H
-    ok &= WritePNG(out_file, use_stdout, buffer);
+    ok &= WritePNG(out_file, buffer);
 #else
    ok &= WritePNG(fout, buffer);
 #endif
@ -497,38 +320,23 @@ static int SaveOutput(const WebPDecBuffer* const buffer,
    ok &= WritePPM(fout, buffer, 1);
  } else if (format == PPM) {
    ok &= WritePPM(fout, buffer, 0);
-  } else if (format == BMP) {
-    ok &= WriteBMP(fout, buffer);
-  } else if (format == TIFF) {
-    ok &= WriteTIFF(fout, buffer);
  } else if (format == PGM || format == YUV) {
    ok &= WritePGMOrYUV(fout, buffer, format);
  } else if (format == ALPHA_PLANE_ONLY) {
    ok &= WriteAlphaPlane(fout, buffer);
  }
-  if (fout != NULL && fout != stdout) {
+  if (fout) {
    fclose(fout);
  }
  if (ok) {
-    if (!quiet) {
-      if (use_stdout) {
-        fprintf(stderr, "Saved to stdout\n");
-      } else {
-        fprintf(stderr, "Saved file %s\n", out_file);
-      }
-    }
+    printf("Saved file %s\n", out_file);
    if (verbose) {
      const double write_time = StopwatchReadAndReset(&stop_watch);
-      fprintf(stderr, "Time to write output: %.3fs\n", write_time);
+      printf("Time to write output: %.3fs\n", write_time);
    }
  } else {
-    if (use_stdout) {
-      fprintf(stderr, "Error writing to stdout !!\n");
-    } else {
-      fprintf(stderr, "Error writing file %s !!\n", out_file);
-    }
+    fprintf(stderr, "Error writing file %s !!\n", out_file);
  }
-  return ok;
 }

 static void Help(void) {
@ -537,40 +345,32 @@ static void Help(void) {
         "Use following options to convert into alternate image formats:\n"
         "  -pam ......... save the raw RGBA samples as a color PAM\n"
         "  -ppm ......... save the raw RGB samples as a color PPM\n"
-         "  -bmp ......... save as uncompressed BMP format\n"
-         "  -tiff ........ save as uncompressed TIFF format\n"
         "  -pgm ......... save the raw YUV samples as a grayscale PGM\n"
-         "                 file with IMC4 layout\n"
-         "  -yuv ......... save the raw YUV samples in flat layout\n"
+         "                 file with IMC4 layout.\n"
+         "  -yuv ......... save the raw YUV samples in flat layout.\n"
         "\n"
         " Other options are:\n"
-         "  -version  .... print version number and exit\n"
-         "  -nofancy ..... don't use the fancy YUV420 upscaler\n"
-         "  -nofilter .... disable in-loop filtering\n"
-         "  -nodither .... disable dithering\n"
-         "  -dither <d> .. dithering strength (in 0..100)\n"
-         "  -alpha_dither  use alpha-plane dithering if needed\n"
+         "  -version  .... print version number and exit.\n"
+         "  -nofancy ..... don't use the fancy YUV420 upscaler.\n"
+         "  -nofilter .... disable in-loop filtering.\n"
         "  -mt .......... use multi-threading\n"
         "  -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
-         "  -resize <w> <h> ......... scale the output (*after* any cropping)\n"
-         "  -flip ........ flip the output vertically\n"
-         "  -alpha ....... only save the alpha plane\n"
-         "  -incremental . use incremental decoding (useful for tests)\n"
-         "  -h     ....... this help message\n"
+         "  -scale <w> <h> .......... scale the output (*after* any cropping)\n"
+         "  -alpha ....... only save the alpha plane.\n"
+         "  -h     ....... this help message.\n"
         "  -v     ....... verbose (e.g. print encoding/decoding times)\n"
-         "  -quiet ....... quiet mode, don't print anything\n"
 #ifndef WEBP_DLL
-         "  -noasm ....... disable all assembly optimizations\n"
+         "  -noasm ....... disable all assembly optimizations.\n"
 #endif
        );
 }

-static const char* const kFormatType[] = {
-  "unspecified", "lossy", "lossless"
+static const char* const kStatusMessages[] = {
+  "OK", "OUT_OF_MEMORY", "INVALID_PARAM", "BITSTREAM_ERROR",
+  "UNSUPPORTED_FEATURE", "SUSPENDED", "USER_ABORT", "NOT_ENOUGH_DATA"
 };

 int main(int argc, const char *argv[]) {
-  int ok = 0;
  const char *in_file = NULL;
  const char *out_file = NULL;

@ -578,7 +378,6 @@ int main(int argc, const char *argv[]) {
  WebPDecBuffer* const output_buffer = &config.output;
  WebPBitstreamFeatures* const bitstream = &config.input;
  OutputFileFormat format = PNG;
-  int incremental = 0;
  int c;

  if (!WebPInitDecoderConfig(&config)) {
@ -587,7 +386,6 @@ int main(int argc, const char *argv[]) {
  }

  for (c = 1; c < argc; ++c) {
-    int parse_error = 0;
    if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
      Help();
      return 0;
@ -603,12 +401,6 @@ int main(int argc, const char *argv[]) {
      format = PAM;
    } else if (!strcmp(argv[c], "-ppm")) {
      format = PPM;
-    } else if (!strcmp(argv[c], "-bmp")) {
-      format = BMP;
-    } else if (!strcmp(argv[c], "-tiff")) {
-      format = TIFF;
-    } else if (!strcmp(argv[c], "-quiet")) {
-      quiet = 1;
    } else if (!strcmp(argv[c], "-version")) {
      const int version = WebPGetDecoderVersion();
      printf("%d.%d.%d\n",
@ -620,37 +412,22 @@ int main(int argc, const char *argv[]) {
      format = YUV;
    } else if (!strcmp(argv[c], "-mt")) {
      config.options.use_threads = 1;
-    } else if (!strcmp(argv[c], "-alpha_dither")) {
-      config.options.alpha_dithering_strength = 100;
-    } else if (!strcmp(argv[c], "-nodither")) {
-      config.options.dithering_strength = 0;
-    } else if (!strcmp(argv[c], "-dither") && c < argc - 1) {
-      config.options.dithering_strength =
-          ExUtilGetInt(argv[++c], 0, &parse_error);
    } else if (!strcmp(argv[c], "-crop") && c < argc - 4) {
      config.options.use_cropping = 1;
-      config.options.crop_left   = ExUtilGetInt(argv[++c], 0, &parse_error);
-      config.options.crop_top    = ExUtilGetInt(argv[++c], 0, &parse_error);
-      config.options.crop_width  = ExUtilGetInt(argv[++c], 0, &parse_error);
-      config.options.crop_height = ExUtilGetInt(argv[++c], 0, &parse_error);
-    } else if ((!strcmp(argv[c], "-scale") || !strcmp(argv[c], "-resize")) &&
-               c < argc - 2) {  // '-scale' is left for compatibility
+      config.options.crop_left   = strtol(argv[++c], NULL, 0);
+      config.options.crop_top    = strtol(argv[++c], NULL, 0);
+      config.options.crop_width  = strtol(argv[++c], NULL, 0);
+      config.options.crop_height = strtol(argv[++c], NULL, 0);
+    } else if (!strcmp(argv[c], "-scale") && c < argc - 2) {
      config.options.use_scaling = 1;
-      config.options.scaled_width  = ExUtilGetInt(argv[++c], 0, &parse_error);
-      config.options.scaled_height = ExUtilGetInt(argv[++c], 0, &parse_error);
-    } else if (!strcmp(argv[c], "-flip")) {
-      config.options.flip = 1;
+      config.options.scaled_width  = strtol(argv[++c], NULL, 0);
+      config.options.scaled_height = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-v")) {
      verbose = 1;
 #ifndef WEBP_DLL
    } else if (!strcmp(argv[c], "-noasm")) {
      VP8GetCPUInfo = NULL;
 #endif
-    } else if (!strcmp(argv[c], "-incremental")) {
-      incremental = 1;
-    } else if (!strcmp(argv[c], "--")) {
-      if (c < argc - 1) in_file = argv[++c];
-      break;
    } else if (argv[c][0] == '-') {
      fprintf(stderr, "Unknown option '%s'\n", argv[c]);
      Help();
@ -658,11 +435,6 @@ int main(int argc, const char *argv[]) {
    } else {
      in_file = argv[c];
    }
-
-    if (parse_error) {
-      Help();
-      return -1;
-    }
  }

  if (in_file == NULL) {
@ -671,14 +443,28 @@ int main(int argc, const char *argv[]) {
    return -1;
  }

-  if (quiet) verbose = 0;
-
  {
+    Stopwatch stop_watch;
    VP8StatusCode status = VP8_STATUS_OK;
+    int ok;
    size_t data_size = 0;
    const uint8_t* data = NULL;
-    if (!ExUtilLoadWebP(in_file, &data, &data_size, bitstream)) {
-      return -1;
+
+    if (!ExUtilReadFile(in_file, &data, &data_size)) return -1;
+
+    if (verbose)
+      StopwatchReadAndReset(&stop_watch);
+
+    status = WebPGetFeatures(data, data_size, bitstream);
+    if (status != VP8_STATUS_OK) {
+      goto end;
+    }
+
+    if (bitstream->has_animation) {
+      fprintf(stderr,
+              "Error! Decoding of an animated WebP file is not supported.\n"
+              "       Use webpmux to extract the individual frames or\n"
+              "       vwebp to view this image.\n");
    }

    switch (format) {
@ -695,13 +481,6 @@ int main(int argc, const char *argv[]) {
      case PPM:
        output_buffer->colorspace = MODE_RGB;  // drops alpha for PPM
        break;
-      case BMP:
-        output_buffer->colorspace = bitstream->has_alpha ? MODE_BGRA : MODE_BGR;
-        break;
-      case TIFF:    // note: force pre-multiplied alpha
-        output_buffer->colorspace =
-            bitstream->has_alpha ? MODE_rgbA : MODE_RGB;
-        break;
      case PGM:
      case YUV:
        output_buffer->colorspace = bitstream->has_alpha ? MODE_YUVA : MODE_YUV;
@ -713,44 +492,36 @@ int main(int argc, const char *argv[]) {
        free((void*)data);
        return -1;
    }
+    status = WebPDecode(data, data_size, &config);

-    if (incremental) {
-      status = ExUtilDecodeWebPIncremental(data, data_size, verbose, &config);
-    } else {
-      status = ExUtilDecodeWebP(data, data_size, verbose, &config);
+    if (verbose) {
+      const double decode_time = StopwatchReadAndReset(&stop_watch);
+      printf("Time to decode picture: %.3fs\n", decode_time);
    }
-
+ end:
    free((void*)data);
    ok = (status == VP8_STATUS_OK);
    if (!ok) {
-      ExUtilPrintWebPError(in_file, status);
-      goto Exit;
+      fprintf(stderr, "Decoding of %s failed.\n", in_file);
+      fprintf(stderr, "Status: %d (%s)\n", status, kStatusMessages[status]);
+      return -1;
    }
  }

-  if (out_file != NULL) {
-    if (!quiet) {
-      fprintf(stderr, "Decoded %s. Dimensions: %d x %d %s. Format: %s. "
-                      "Now saving...\n",
-              in_file, output_buffer->width, output_buffer->height,
-              bitstream->has_alpha ? " (with alpha)" : "",
-              kFormatType[bitstream->format]);
-    }
-    ok = SaveOutput(output_buffer, format, out_file);
+  if (out_file) {
+    printf("Decoded %s. Dimensions: %d x %d%s. Now saving...\n", in_file,
+           output_buffer->width, output_buffer->height,
+           bitstream->has_alpha ? " (with alpha)" : "");
+    SaveOutput(output_buffer, format, out_file);
  } else {
-    if (!quiet) {
-      fprintf(stderr, "File %s can be decoded "
-                      "(dimensions: %d x %d %s. Format: %s).\n",
-              in_file, output_buffer->width, output_buffer->height,
-              bitstream->has_alpha ? " (with alpha)" : "",
-              kFormatType[bitstream->format]);
-      fprintf(stderr, "Nothing written; "
-                      "use -o flag to save the result as e.g. PNG.\n");
-    }
+    printf("File %s can be decoded (dimensions: %d x %d)%s.\n",
+           in_file, output_buffer->width, output_buffer->height,
+           bitstream->has_alpha ? " (with alpha)" : "");
+    printf("Nothing written; use -o flag to save the result as e.g. PNG.\n");
  }
- Exit:
  WebPFreeDecBuffer(output_buffer);
-  return ok ? 0 : -1;
+
+  return 0;
 }

 //------------------------------------------------------------------------------
--- a/examples/example_util.c
+++ b/examples/example_util.c
@ -11,104 +11,24 @@
 //

 #include "./example_util.h"
-
-#if defined(_WIN32)
-#include <fcntl.h>   // for _O_BINARY
-#include <io.h>      // for _setmode()
-#endif
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>

-#include "webp/decode.h"
-#include "./stopwatch.h"
-
-//------------------------------------------------------------------------------
-// String parsing
-
-uint32_t ExUtilGetUInt(const char* const v, int base, int* const error) {
-  char* end = NULL;
-  const uint32_t n = (v != NULL) ? (uint32_t)strtoul(v, &end, base) : 0u;
-  if (end == v && error != NULL && !*error) {
-    *error = 1;
-    fprintf(stderr, "Error! '%s' is not an integer.\n",
-            (v != NULL) ? v : "(null)");
-  }
-  return n;
-}
-
-int ExUtilGetInt(const char* const v, int base, int* const error) {
-  return (int)ExUtilGetUInt(v, base, error);
-}
-
-float ExUtilGetFloat(const char* const v, int* const error) {
-  char* end = NULL;
-  const float f = (v != NULL) ? (float)strtod(v, &end) : 0.f;
-  if (end == v && error != NULL && !*error) {
-    *error = 1;
-    fprintf(stderr, "Error! '%s' is not a floating point number.\n",
-            (v != NULL) ? v : "(null)");
-  }
-  return f;
-}
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 // -----------------------------------------------------------------------------
 // File I/O

-FILE* ExUtilSetBinaryMode(FILE* file) {
-#if defined(_WIN32)
-  if (_setmode(_fileno(file), _O_BINARY) == -1) {
-    fprintf(stderr, "Failed to reopen file in O_BINARY mode.\n");
-    return NULL;
-  }
-#endif
-  return file;
-}
-
-int ExUtilReadFromStdin(const uint8_t** data, size_t* data_size) {
-  static const size_t kBlockSize = 16384;  // default initial size
-  size_t max_size = 0;
-  size_t size = 0;
-  uint8_t* input = NULL;
-
-  if (data == NULL || data_size == NULL) return 0;
-  *data = NULL;
-  *data_size = 0;
-
-  if (!ExUtilSetBinaryMode(stdin)) return 0;
-
-  while (!feof(stdin)) {
-    // We double the buffer size each time and read as much as possible.
-    const size_t extra_size = (max_size == 0) ? kBlockSize : max_size;
-    void* const new_data = realloc(input, max_size + extra_size);
-    if (new_data == NULL) goto Error;
-    input = (uint8_t*)new_data;
-    max_size += extra_size;
-    size += fread(input + size, 1, extra_size, stdin);
-    if (size < max_size) break;
-  }
-  if (ferror(stdin)) goto Error;
-  *data = input;
-  *data_size = size;
-  return 1;
-
- Error:
-  free(input);
-  fprintf(stderr, "Could not read from stdin\n");
-  return 0;
-}
-
 int ExUtilReadFile(const char* const file_name,
                   const uint8_t** data, size_t* data_size) {
  int ok;
  void* file_data;
  size_t file_size;
  FILE* in;
-  const int from_stdin = (file_name == NULL) || !strcmp(file_name, "-");

-  if (from_stdin) return ExUtilReadFromStdin(data, data_size);
-
-  if (data == NULL || data_size == NULL) return 0;
+  if (file_name == NULL || data == NULL || data_size == NULL) return 0;
  *data = NULL;
  *data_size = 0;

@ -140,131 +60,20 @@ int ExUtilWriteFile(const char* const file_name,
                    const uint8_t* data, size_t data_size) {
  int ok;
  FILE* out;
-  const int to_stdout = (file_name == NULL) || !strcmp(file_name, "-");

-  if (data == NULL) {
+  if (file_name == NULL || data == NULL) {
    return 0;
  }
-  out = to_stdout ? stdout : fopen(file_name, "wb");
+  out = fopen(file_name, "wb");
  if (out == NULL) {
    fprintf(stderr, "Error! Cannot open output file '%s'\n", file_name);
    return 0;
  }
  ok = (fwrite(data, data_size, 1, out) == 1);
-  if (out != stdout) fclose(out);
+  fclose(out);
  return ok;
 }

-//------------------------------------------------------------------------------
-// WebP decoding
-
-static const char* const kStatusMessages[VP8_STATUS_NOT_ENOUGH_DATA + 1] = {
-  "OK", "OUT_OF_MEMORY", "INVALID_PARAM", "BITSTREAM_ERROR",
-  "UNSUPPORTED_FEATURE", "SUSPENDED", "USER_ABORT", "NOT_ENOUGH_DATA"
-};
-
-static void PrintAnimationWarning(const WebPDecoderConfig* const config) {
-  if (config->input.has_animation) {
-    fprintf(stderr,
-            "Error! Decoding of an animated WebP file is not supported.\n"
-            "       Use webpmux to extract the individual frames or\n"
-            "       vwebp to view this image.\n");
-  }
-}
-
-void ExUtilPrintWebPError(const char* const in_file, int status) {
-  fprintf(stderr, "Decoding of %s failed.\n", in_file);
-  fprintf(stderr, "Status: %d", status);
-  if (status >= VP8_STATUS_OK && status <= VP8_STATUS_NOT_ENOUGH_DATA) {
-    fprintf(stderr, "(%s)", kStatusMessages[status]);
-  }
-  fprintf(stderr, "\n");
-}
-
-int ExUtilLoadWebP(const char* const in_file,
-                   const uint8_t** data, size_t* data_size,
-                   WebPBitstreamFeatures* bitstream) {
-  VP8StatusCode status;
-  WebPBitstreamFeatures local_features;
-  if (!ExUtilReadFile(in_file, data, data_size)) return 0;
-
-  if (bitstream == NULL) {
-    bitstream = &local_features;
-  }
-
-  status = WebPGetFeatures(*data, *data_size, bitstream);
-  if (status != VP8_STATUS_OK) {
-    free((void*)*data);
-    *data = NULL;
-    *data_size = 0;
-    ExUtilPrintWebPError(in_file, status);
-    return 0;
-  }
-  return 1;
-}
-
-//------------------------------------------------------------------------------
-
-VP8StatusCode ExUtilDecodeWebP(const uint8_t* const data, size_t data_size,
-                               int verbose, WebPDecoderConfig* const config) {
-  Stopwatch stop_watch;
-  VP8StatusCode status = VP8_STATUS_OK;
-  if (config == NULL) return VP8_STATUS_INVALID_PARAM;
-
-  PrintAnimationWarning(config);
-
-  StopwatchReset(&stop_watch);
-
-  // Decoding call.
-  status = WebPDecode(data, data_size, config);
-
-  if (verbose) {
-    const double decode_time = StopwatchReadAndReset(&stop_watch);
-    fprintf(stderr, "Time to decode picture: %.3fs\n", decode_time);
-  }
-  return status;
-}
-
-VP8StatusCode ExUtilDecodeWebPIncremental(
-    const uint8_t* const data, size_t data_size,
-    int verbose, WebPDecoderConfig* const config) {
-  Stopwatch stop_watch;
-  VP8StatusCode status = VP8_STATUS_OK;
-  if (config == NULL) return VP8_STATUS_INVALID_PARAM;
-
-  PrintAnimationWarning(config);
-
-  StopwatchReset(&stop_watch);
-
-  // Decoding call.
-  {
-    WebPIDecoder* const idec = WebPIDecode(data, data_size, config);
-    if (idec == NULL) {
-      fprintf(stderr, "Failed during WebPINewDecoder().\n");
-      return VP8_STATUS_OUT_OF_MEMORY;
-    } else {
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-      size_t size = 0;
-      const size_t incr = 2 + (data_size / 20);
-      while (size < data_size) {
-        size_t next_size = size + (rand() % incr);
-        if (next_size > data_size) next_size = data_size;
-        status = WebPIUpdate(idec, data, next_size);
-        if (status != VP8_STATUS_OK && status != VP8_STATUS_SUSPENDED) break;
-        size = next_size;
-      }
-#else
-      status = WebPIUpdate(idec, data, data_size);
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
 #endif
-      WebPIDelete(idec);
-    }
-  }
-
-  if (verbose) {
-    const double decode_time = StopwatchReadAndReset(&stop_watch);
-    fprintf(stderr, "Time to decode picture: %.3fs\n", decode_time);
-  }
-  return status;
-}
-
-// -----------------------------------------------------------------------------
--- a/examples/example_util.h
+++ b/examples/example_util.h
@ -13,76 +13,23 @@
 #ifndef WEBP_EXAMPLES_EXAMPLE_UTIL_H_
 #define WEBP_EXAMPLES_EXAMPLE_UTIL_H_

-#include <stdio.h>
-#include "webp/decode.h"
+#include "webp/types.h"

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

-//------------------------------------------------------------------------------
-// String parsing
-
-// Parses 'v' using strto(ul|l|d)(). If error is non-NULL, '*error' is set to
-// true on failure while on success it is left unmodified to allow chaining of
-// calls. An error is only printed on the first occurrence.
-uint32_t ExUtilGetUInt(const char* const v, int base, int* const error);
-int ExUtilGetInt(const char* const v, int base, int* const error);
-float ExUtilGetFloat(const char* const v, int* const error);
-
-//------------------------------------------------------------------------------
-// File I/O
-
-// Reopen file in binary (O_BINARY) mode.
-// Returns 'file' on success, NULL otherwise.
-FILE* ExUtilSetBinaryMode(FILE* file);
-
 // Allocates storage for entire file 'file_name' and returns contents and size
 // in 'data' and 'data_size'. Returns 1 on success, 0 otherwise. '*data' should
 // be deleted using free().
-// If 'file_name' is NULL or equal to "-", input is read from stdin by calling
-// the function ExUtilReadFromStdin().
 int ExUtilReadFile(const char* const file_name,
                   const uint8_t** data, size_t* data_size);

-// Same as ExUtilReadFile(), but reads until EOF from stdin instead.
-int ExUtilReadFromStdin(const uint8_t** data, size_t* data_size);
-
 // Write a data segment into a file named 'file_name'. Returns true if ok.
-// If 'file_name' is NULL or equal to "-", output is written to stdout.
 int ExUtilWriteFile(const char* const file_name,
                    const uint8_t* data, size_t data_size);

-//------------------------------------------------------------------------------
-// WebP decoding
-
-// Prints an informative error message regarding decode failure of 'in_file'.
-// 'status' is treated as a VP8StatusCode and if valid will be printed as a
-// text string.
-void ExUtilPrintWebPError(const char* const in_file, int status);
-
-// Reads a WebP from 'in_file', returning the contents and size in 'data' and
-// 'data_size'. If not NULL, 'bitstream' is populated using WebPGetFeatures().
-// Returns true on success.
-int ExUtilLoadWebP(const char* const in_file,
-                   const uint8_t** data, size_t* data_size,
-                   WebPBitstreamFeatures* bitstream);
-
-// Decodes the WebP contained in 'data'.
-// 'config' is a structure previously initialized by WebPInitDecoderConfig().
-// 'config->output' should have the desired colorspace selected. 'verbose' will
-// cause decode timing to be reported.
-// Returns the decoder status. On success 'config->output' will contain the
-// decoded picture.
-VP8StatusCode ExUtilDecodeWebP(const uint8_t* const data, size_t data_size,
-                               int verbose, WebPDecoderConfig* const config);
-
-// Same as ExUtilDecodeWebP(), but using the incremental decoder.
-VP8StatusCode ExUtilDecodeWebPIncremental(
-    const uint8_t* const data, size_t data_size,
-    int verbose, WebPDecoderConfig* const config);
-
-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/examples/gif2webp.c
+++ b/examples/gif2webp.c
@ -18,22 +18,140 @@
 #include <string.h>

 #ifdef HAVE_CONFIG_H
-#include "webp/config.h"
+#include "config.h"
 #endif

-#ifdef WEBP_HAVE_GIF
-
 #include <gif_lib.h>
 #include "webp/encode.h"
 #include "webp/mux.h"
 #include "./example_util.h"
-#include "./gifdec.h"
+
+#define GIF_TRANSPARENT_MASK 0x01
+#define GIF_DISPOSE_MASK     0x07
+#define GIF_DISPOSE_SHIFT    2
+#define TRANSPARENT_COLOR    0x00ffffff
+#define WHITE_COLOR          0xffffffff

 //------------------------------------------------------------------------------

-static int transparent_index = GIF_INDEX_INVALID;  // Opaque by default.
+static int transparent_index = -1;  // No transparency by default.

-static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
+static void ClearPicture(WebPPicture* const picture, uint32_t color) {
+  int x, y;
+  for (y = 0; y < picture->height; ++y) {
+    uint32_t* const dst = picture->argb + y * picture->argb_stride;
+    for (x = 0; x < picture->width; ++x) dst[x] = color;
+  }
+}
+
+static void Remap(const uint8_t* const src, const GifFileType* const gif,
+                  uint32_t* dst, int len) {
+  int i;
+  const GifColorType* colors;
+  const ColorMapObject* const cmap =
+      gif->Image.ColorMap ? gif->Image.ColorMap : gif->SColorMap;
+  if (cmap == NULL) return;
+  colors = cmap->Colors;
+
+  for (i = 0; i < len; ++i) {
+    const GifColorType c = colors[src[i]];
+    dst[i] = (src[i] == transparent_index) ? TRANSPARENT_COLOR
+           : c.Blue | (c.Green << 8) | (c.Red << 16) | (0xff << 24);
+  }
+}
+
+static int ReadSubImage(GifFileType* gif, WebPPicture* pic, WebPPicture* view) {
+  const GifImageDesc image_desc = gif->Image;
+  const int offset_x = image_desc.Left;
+  const int offset_y = image_desc.Top;
+  const int sub_w = image_desc.Width;
+  const int sub_h = image_desc.Height;
+  uint32_t* dst = NULL;
+  uint8_t* tmp = NULL;
+  int ok = 0;
+
+  // Use a view for the sub-picture:
+  if (!WebPPictureView(pic, offset_x, offset_y, sub_w, sub_h, view)) {
+    fprintf(stderr, "Sub-image %dx%d at position %d,%d is invalid!\n",
+            sub_w, sub_h, offset_x, offset_y);
+    goto End;
+  }
+  dst = view->argb;
+
+  tmp = (uint8_t*)malloc(sub_w * sizeof(*tmp));
+  if (tmp == NULL) goto End;
+
+  if (image_desc.Interlace) {  // Interlaced image.
+    // We need 4 passes, with the following offsets and jumps.
+    const int interlace_offsets[] = { 0, 4, 2, 1 };
+    const int interlace_jumps[]   = { 8, 8, 4, 2 };
+    int pass;
+    for (pass = 0; pass < 4; ++pass) {
+      int y;
+      for (y = interlace_offsets[pass]; y < sub_h; y += interlace_jumps[pass]) {
+        if (DGifGetLine(gif, tmp, sub_w) == GIF_ERROR) goto End;
+        Remap(tmp, gif, dst + y * view->argb_stride, sub_w);
+      }
+    }
+  } else {  // Non-interlaced image.
+    int y;
+    for (y = 0; y < sub_h; ++y) {
+      if (DGifGetLine(gif, tmp, sub_w) == GIF_ERROR) goto End;
+      Remap(tmp, gif, dst + y * view->argb_stride, sub_w);
+    }
+  }
+  // re-align the view with even offset (and adjust dimensions if needed).
+  WebPPictureView(pic, offset_x & ~1, offset_y & ~1,
+                  sub_w + (offset_x & 1), sub_h + (offset_y & 1), view);
+  ok = 1;
+
+ End:
+  free(tmp);
+  return ok;
+}
+
+static int GetBackgroundColor(const ColorMapObject* const color_map,
+                              GifWord bgcolor_idx, uint32_t* const bgcolor) {
+  if (transparent_index != -1 && bgcolor_idx == transparent_index) {
+    *bgcolor = TRANSPARENT_COLOR;  // Special case.
+    return 1;
+  } else if (color_map == NULL || color_map->Colors == NULL
+             || bgcolor_idx >= color_map->ColorCount) {
+    return 0;  // Invalid color map or index.
+  } else {
+    const GifColorType color = color_map->Colors[bgcolor_idx];
+    *bgcolor = (0xff        << 24)
+             | (color.Red   << 16)
+             | (color.Green <<  8)
+             | (color.Blue  <<  0);
+    return 1;
+  }
+}
+
+static void DisplayGifError(const GifFileType* const gif, int gif_error) {
+  // GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
+  // libgif 4.2.0 has retired PrintGifError() and added GifErrorString().
+#if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR) && \
+        ((GIFLIB_MAJOR == 4 && GIFLIB_MINOR >= 2) || GIFLIB_MAJOR > 4)
+#if GIFLIB_MAJOR >= 5
+    // Static string actually, hence the const char* cast.
+    const char* error_str = (const char*)GifErrorString(
+        (gif == NULL) ? gif_error : gif->Error);
+#else
+    const char* error_str = (const char*)GifErrorString();
+    (void)gif;
+#endif
+    if (error_str == NULL) error_str = "Unknown error";
+    fprintf(stderr, "GIFLib Error %d: %s\n", gif_error, error_str);
+#else
+    (void)gif;
+    fprintf(stderr, "GIFLib Error %d: ", gif_error);
+    PrintGifError();
+    fprintf(stderr, "\n");
+#endif
+}
+
+static const char* const kErrorMessages[] = {
  "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
  "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"
 };
@ -43,41 +161,21 @@ static const char* ErrorString(WebPMuxError err) {
  return kErrorMessages[-err];
 }

-enum {
-  METADATA_ICC  = (1 << 0),
-  METADATA_XMP  = (1 << 1),
-  METADATA_ALL  = METADATA_ICC | METADATA_XMP
-};
-
 //------------------------------------------------------------------------------

 static void Help(void) {
  printf("Usage:\n");
  printf(" gif2webp [options] gif_file -o webp_file\n");
-  printf("Options:\n");
+  printf("options:\n");
  printf("  -h / -help  ............ this help\n");
-  printf("  -lossy ................. encode image using lossy compression\n");
-  printf("  -mixed ................. for each frame in the image, pick lossy\n"
-         "                           or lossless compression heuristically\n");
+  printf("  -lossy ................. Encode image using lossy compression.\n");
  printf("  -q <float> ............. quality factor (0:small..100:big)\n");
  printf("  -m <int> ............... compression method (0=fast, 6=slowest)\n");
-  printf("  -min_size .............. minimize output size (default:off)\n"
-         "                           lossless compression by default; can be\n"
-         "                           combined with -q, -m, -lossy or -mixed\n"
-         "                           options\n");
-  printf("  -kmin <int> ............ min distance between key frames\n");
-  printf("  -kmax <int> ............ max distance between key frames\n");
  printf("  -f <int> ............... filter strength (0=off..100)\n");
-  printf("  -metadata <string> ..... comma separated list of metadata to\n");
-  printf("                           ");
-  printf("copy from the input to the output if present\n");
-  printf("                           "
-         "Valid values: all, none, icc, xmp (default)\n");
-  printf("  -mt .................... use multi-threading if available\n");
  printf("\n");
-  printf("  -version ............... print version number and exit\n");
-  printf("  -v ..................... verbose\n");
-  printf("  -quiet ................. don't print anything\n");
+  printf("  -version ............... print version number and exit.\n");
+  printf("  -v ..................... verbose.\n");
+  printf("  -quiet ................. don't print anything.\n");
  printf("\n");
 }

@ -91,57 +189,36 @@ int main(int argc, const char *argv[]) {
  const char *in_file = NULL, *out_file = NULL;
  FILE* out = NULL;
  GifFileType* gif = NULL;
-  int frame_duration = 0;
-  int frame_timestamp = 0;
-  GIFDisposeMethod orig_dispose = GIF_DISPOSE_NONE;
+  WebPPicture picture;
+  WebPMuxFrameInfo frame;
+  WebPMuxAnimParams anim = { WHITE_COLOR, 0 };

-  WebPPicture frame;                // Frame rectangle only (not disposed).
-  WebPPicture curr_canvas;          // Not disposed.
-  WebPPicture prev_canvas;          // Disposed.
-  WebPPicture prev_to_prev_canvas;  // Disposed.
-
-  WebPAnimEncoder* enc = NULL;
-  WebPAnimEncoderOptions enc_options;
-  WebPConfig config;
-
-  int is_first_frame = 1;     // Whether we are processing the first frame.
+  int is_first_frame = 1;
  int done;
  int c;
  int quiet = 0;
-  WebPData webp_data;
-
-  int keep_metadata = METADATA_XMP;  // ICC not output by default.
-  WebPData icc_data;
-  int stored_icc = 0;         // Whether we have already stored an ICC profile.
-  WebPData xmp_data;
-  int stored_xmp = 0;         // Whether we have already stored an XMP profile.
-  int loop_count = 0;
-  int stored_loop_count = 0;  // Whether we have found an explicit loop count.
+  WebPConfig config;
  WebPMux* mux = NULL;
+  WebPData webp_data = { NULL, 0 };
+  int stored_icc = 0;  // Whether we have already stored an ICC profile.
+  int stored_xmp = 0;

-  int default_kmin = 1;  // Whether to use default kmin value.
-  int default_kmax = 1;
+  memset(&frame, 0, sizeof(frame));
+  frame.id = WEBP_CHUNK_ANMF;
+  frame.dispose_method = WEBP_MUX_DISPOSE_BACKGROUND;

-  if (!WebPConfigInit(&config) || !WebPAnimEncoderOptionsInit(&enc_options) ||
-      !WebPPictureInit(&frame) || !WebPPictureInit(&curr_canvas) ||
-      !WebPPictureInit(&prev_canvas) ||
-      !WebPPictureInit(&prev_to_prev_canvas)) {
+  if (!WebPConfigInit(&config) || !WebPPictureInit(&picture)) {
    fprintf(stderr, "Error! Version mismatch!\n");
    return -1;
  }
  config.lossless = 1;  // Use lossless compression by default.

-  WebPDataInit(&webp_data);
-  WebPDataInit(&icc_data);
-  WebPDataInit(&xmp_data);
-
  if (argc == 1) {
    Help();
    return 0;
  }

  for (c = 1; c < argc; ++c) {
-    int parse_error = 0;
    if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
      Help();
      return 0;
@ -149,64 +226,12 @@ int main(int argc, const char *argv[]) {
      out_file = argv[++c];
    } else if (!strcmp(argv[c], "-lossy")) {
      config.lossless = 0;
-    } else if (!strcmp(argv[c], "-mixed")) {
-      enc_options.allow_mixed = 1;
-      config.lossless = 0;
    } else if (!strcmp(argv[c], "-q") && c < argc - 1) {
-      config.quality = ExUtilGetFloat(argv[++c], &parse_error);
+      config.quality = (float)strtod(argv[++c], NULL);
    } else if (!strcmp(argv[c], "-m") && c < argc - 1) {
-      config.method = ExUtilGetInt(argv[++c], 0, &parse_error);
-    } else if (!strcmp(argv[c], "-min_size")) {
-      enc_options.minimize_size = 1;
-    } else if (!strcmp(argv[c], "-kmax") && c < argc - 1) {
-      enc_options.kmax = ExUtilGetInt(argv[++c], 0, &parse_error);
-      default_kmax = 0;
-    } else if (!strcmp(argv[c], "-kmin") && c < argc - 1) {
-      enc_options.kmin = ExUtilGetInt(argv[++c], 0, &parse_error);
-      default_kmin = 0;
+      config.method = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-f") && c < argc - 1) {
-      config.filter_strength = ExUtilGetInt(argv[++c], 0, &parse_error);
-    } else if (!strcmp(argv[c], "-metadata") && c < argc - 1) {
-      static const struct {
-        const char* option;
-        int flag;
-      } kTokens[] = {
-        { "all",  METADATA_ALL },
-        { "none", 0 },
-        { "icc",  METADATA_ICC },
-        { "xmp",  METADATA_XMP },
-      };
-      const size_t kNumTokens = sizeof(kTokens) / sizeof(*kTokens);
-      const char* start = argv[++c];
-      const char* const end = start + strlen(start);
-
-      keep_metadata = 0;
-      while (start < end) {
-        size_t i;
-        const char* token = strchr(start, ',');
-        if (token == NULL) token = end;
-
-        for (i = 0; i < kNumTokens; ++i) {
-          if ((size_t)(token - start) == strlen(kTokens[i].option) &&
-              !strncmp(start, kTokens[i].option, strlen(kTokens[i].option))) {
-            if (kTokens[i].flag != 0) {
-              keep_metadata |= kTokens[i].flag;
-            } else {
-              keep_metadata = 0;
-            }
-            break;
-          }
-        }
-        if (i == kNumTokens) {
-          fprintf(stderr, "Error! Unknown metadata type '%.*s'\n",
-                  (int)(token - start), start);
-          Help();
-          return -1;
-        }
-        start = token + 1;
-      }
-    } else if (!strcmp(argv[c], "-mt")) {
-      ++config.thread_level;
+      config.filter_strength = strtol(argv[++c], NULL, 0);
    } else if (!strcmp(argv[c], "-version")) {
      const int enc_version = WebPGetEncoderVersion();
      const int mux_version = WebPGetMuxVersion();
@ -219,10 +244,6 @@ int main(int argc, const char *argv[]) {
      quiet = 1;
    } else if (!strcmp(argv[c], "-v")) {
      verbose = 1;
-      enc_options.verbose = 1;
-    } else if (!strcmp(argv[c], "--")) {
-      if (c < argc - 1) in_file = argv[++c];
-      break;
    } else if (argv[c][0] == '-') {
      fprintf(stderr, "Error! Unknown option '%s'\n", argv[c]);
      Help();
@ -230,21 +251,7 @@ int main(int argc, const char *argv[]) {
    } else {
      in_file = argv[c];
    }
-
-    if (parse_error) {
-      Help();
-      return -1;
-    }
  }
-
-  // Appropriate default kmin, kmax values for lossy and lossless.
-  if (default_kmin) {
-    enc_options.kmin = config.lossless ? 9 : 3;
-  }
-  if (default_kmax) {
-    enc_options.kmax = config.lossless ? 17 : 5;
-  }
-
  if (!WebPValidateConfig(&config)) {
    fprintf(stderr, "Error! Invalid configuration.\n");
    goto End;
@ -257,13 +264,26 @@ int main(int argc, const char *argv[]) {
  }

  // Start the decoder object
-#if LOCAL_GIF_PREREQ(5,0)
+#if defined(GIFLIB_MAJOR) && (GIFLIB_MAJOR >= 5)
+  // There was an API change in version 5.0.0.
  gif = DGifOpenFileName(in_file, &gif_error);
 #else
  gif = DGifOpenFileName(in_file);
 #endif
  if (gif == NULL) goto End;

+  // Allocate picture buffer
+  picture.width = gif->SWidth;
+  picture.height = gif->SHeight;
+  picture.use_argb = 1;
+    if (!WebPPictureAlloc(&picture)) goto End;
+
+  mux = WebPMuxNew();
+  if (mux == NULL) {
+    fprintf(stderr, "ERROR: could not create a mux object.\n");
+    goto End;
+  }
+
  // Loop over GIF images
  done = 0;
  do {
@ -272,88 +292,59 @@ int main(int argc, const char *argv[]) {

    switch (type) {
      case IMAGE_DESC_RECORD_TYPE: {
-        GIFFrameRect gif_rect;
-        GifImageDesc* const image_desc = &gif->Image;
+        WebPPicture sub_image;
+        WebPMemoryWriter memory;
+
+        if (frame.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
+          ClearPicture(&picture, anim.bgcolor);
+        }

        if (!DGifGetImageDesc(gif)) goto End;
+        if (!ReadSubImage(gif, &picture, &sub_image)) goto End;

-        if (is_first_frame) {
-          if (verbose) {
-            printf("Canvas screen: %d x %d\n", gif->SWidth, gif->SHeight);
-          }
-          // Fix some broken GIF global headers that report
-          // 0 x 0 screen dimension.
-          if (gif->SWidth == 0 || gif->SHeight == 0) {
-            image_desc->Left = 0;
-            image_desc->Top = 0;
-            gif->SWidth = image_desc->Width;
-            gif->SHeight = image_desc->Height;
-            if (gif->SWidth <= 0 || gif->SHeight <= 0) {
-              goto End;
-            }
-            if (verbose) {
-              printf("Fixed canvas screen dimension to: %d x %d\n",
-                     gif->SWidth, gif->SHeight);
-            }
-          }
-          // Allocate current buffer.
-          frame.width = gif->SWidth;
-          frame.height = gif->SHeight;
-          frame.use_argb = 1;
-          if (!WebPPictureAlloc(&frame)) goto End;
-          GIFClearPic(&frame, NULL);
-          WebPPictureCopy(&frame, &curr_canvas);
-          WebPPictureCopy(&frame, &prev_canvas);
-          WebPPictureCopy(&frame, &prev_to_prev_canvas);
-
-          // Background color.
-          GIFGetBackgroundColor(gif->SColorMap, gif->SBackGroundColor,
-                                transparent_index,
-                                &enc_options.anim_params.bgcolor);
-
-          // Initialize encoder.
-          enc = WebPAnimEncoderNew(curr_canvas.width, curr_canvas.height,
-                                   &enc_options);
-          if (enc == NULL) {
-            fprintf(stderr,
-                    "Error! Could not create encoder object. Possibly due to "
-                    "a memory error.\n");
-            goto End;
-          }
-          is_first_frame = 0;
+        if (!config.lossless) {
+          // We need to call BGRA variant because of the way we do Remap(). Note
+          // that 'sub_image' will no longer be a view and own some memory.
+          WebPPictureImportBGRA(
+              &sub_image, (uint8_t*)sub_image.argb,
+              sub_image.argb_stride * sizeof(*sub_image.argb));
+          sub_image.use_argb = 0;
+        } else {
+          sub_image.use_argb = 1;
        }

-        // Some even more broken GIF can have sub-rect with zero width/height.
-        if (image_desc->Width == 0 || image_desc->Height == 0) {
-          image_desc->Width = gif->SWidth;
-          image_desc->Height = gif->SHeight;
-        }
-
-        if (!GIFReadFrame(gif, transparent_index, &gif_rect, &frame)) {
+        sub_image.writer = WebPMemoryWrite;
+        sub_image.custom_ptr = &memory;
+        WebPMemoryWriterInit(&memory);
+        if (!WebPEncode(&config, &sub_image)) {
+          fprintf(stderr, "Error! Cannot encode picture as WebP\n");
+          fprintf(stderr, "Error code: %d\n", sub_image.error_code);
          goto End;
        }
-        // Blend frame rectangle with previous canvas to compose full canvas.
-        // Note that 'curr_canvas' is same as 'prev_canvas' at this point.
-        GIFBlendFrames(&frame, &gif_rect, &curr_canvas);

-        if (!WebPAnimEncoderAdd(enc, &curr_canvas, frame_timestamp, &config)) {
-          fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
+        // Now we have all the info about the frame, as a Graphic Control
+        // Extension Block always appears before the Image Descriptor Block.
+        // So add the frame to mux.
+        frame.x_offset = gif->Image.Left & ~1;
+        frame.y_offset = gif->Image.Top & ~1;
+        frame.bitstream.bytes = memory.mem;
+        frame.bitstream.size = memory.size;
+        err = WebPMuxPushFrame(mux, &frame, 1);
+        if (err != WEBP_MUX_OK) {
+          fprintf(stderr, "ERROR (%s): Could not add animation frame.\n",
+                  ErrorString(err));
+          goto End;
        }
-
-        // Update canvases.
-        GIFCopyPixels(&prev_canvas, &prev_to_prev_canvas);
-        GIFDisposeFrame(orig_dispose, &gif_rect, &prev_canvas, &curr_canvas);
-        GIFCopyPixels(&curr_canvas, &prev_canvas);
-
-        // Update timestamp (for next frame).
-        frame_timestamp += frame_duration;
-
-        // In GIF, graphic control extensions are optional for a frame, so we
-        // may not get one before reading the next frame. To handle this case,
-        // we reset frame properties to reasonable defaults for the next frame.
-        orig_dispose = GIF_DISPOSE_NONE;
-        frame_duration = 0;
-        transparent_index = GIF_INDEX_INVALID;
+        if (verbose) {
+          printf("Added frame %dx%d (offset:%d,%d duration:%d) ",
+                 sub_image.width, sub_image.height,
+                 frame.x_offset, frame.y_offset,
+                 frame.duration);
+          printf("dispose:%d transparent index:%d\n",
+                 frame.dispose_method, transparent_index);
+        }
+        WebPDataClear(&frame.bitstream);
+        WebPPictureFree(&sub_image);
        break;
      }
      case EXTENSION_RECORD_TYPE: {
@ -367,9 +358,30 @@ int main(int argc, const char *argv[]) {
            break;  // Do nothing for now.
          }
          case GRAPHICS_EXT_FUNC_CODE: {
-            if (!GIFReadGraphicsExtension(data, &frame_duration, &orig_dispose,
-                                          &transparent_index)) {
-              goto End;
+            const int flags = data[1];
+            const int dispose = (flags >> GIF_DISPOSE_SHIFT) & GIF_DISPOSE_MASK;
+            const int delay = data[2] | (data[3] << 8);  // In 10 ms units.
+            if (data[0] != 4) goto End;
+            frame.duration = delay * 10;  // Duration is in 1 ms units for WebP.
+            if (dispose == 3) {
+              fprintf(stderr, "WARNING: GIF_DISPOSE_RESTORE not supported.");
+              // failsafe. TODO(urvang): emulate the correct behaviour by
+              // recoding the whole frame.
+              frame.dispose_method = WEBP_MUX_DISPOSE_BACKGROUND;
+            } else {
+              frame.dispose_method =
+                  (dispose == 2) ? WEBP_MUX_DISPOSE_BACKGROUND
+                                 : WEBP_MUX_DISPOSE_NONE;
+            }
+            transparent_index = (flags & GIF_TRANSPARENT_MASK) ? data[4] : -1;
+            if (is_first_frame) {
+              if (!GetBackgroundColor(gif->SColorMap, gif->SBackGroundColor,
+                                      &anim.bgcolor)) {
+                fprintf(stderr, "GIF decode warning: invalid background color "
+                        "index. Assuming white background.\n");
+              }
+              ClearPicture(&picture, anim.bgcolor);
+              is_first_frame = 0;
            }
            break;
          }
@ -378,27 +390,69 @@ int main(int argc, const char *argv[]) {
          }
          case APPLICATION_EXT_FUNC_CODE: {
            if (data[0] != 11) break;    // Chunk is too short
-            if (!memcmp(data + 1, "NETSCAPE2.0", 11) ||
-                !memcmp(data + 1, "ANIMEXTS1.0", 11)) {
-              if (!GIFReadLoopCount(gif, &data, &loop_count)) {
-                goto End;
-              }
-              if (verbose) {
-                fprintf(stderr, "Loop count: %d\n", loop_count);
-              }
-              stored_loop_count = (loop_count != 0);
+            if (!memcmp(data + 1, "NETSCAPE2.0", 11)) {
+              // Recognize and parse Netscape2.0 NAB extension for loop count.
+              if (DGifGetExtensionNext(gif, &data) == GIF_ERROR) goto End;
+              if (data == NULL) goto End;  // Loop count sub-block missing.
+              if (data[0] != 3 && data[1] != 1) break;   // wrong size/marker
+              anim.loop_count = data[2] | (data[3] << 8);
+              if (verbose) printf("Loop count: %d\n", anim.loop_count);
            } else {  // An extension containing metadata.
-              // We only store the first encountered chunk of each type, and
-              // only if requested by the user.
-              const int is_xmp = (keep_metadata & METADATA_XMP) &&
-                                 !stored_xmp &&
-                                 !memcmp(data + 1, "XMP DataXMP", 11);
-              const int is_icc = (keep_metadata & METADATA_ICC) &&
-                                 !stored_icc &&
-                                 !memcmp(data + 1, "ICCRGBG1012", 11);
+              // We only store the first encountered chunk of each type.
+              const int is_xmp =
+                  !stored_xmp && !memcmp(data + 1, "XMP DataXMP", 11);
+              const int is_icc =
+                  !stored_icc && !memcmp(data + 1, "ICCRGBG1012", 11);
              if (is_xmp || is_icc) {
-                if (!GIFReadMetadata(gif, &data,
-                                     is_xmp ? &xmp_data : &icc_data)) {
+                const char* const fourccs[2] = { "XMP " , "ICCP" };
+                const char* const features[2] = { "XMP" , "ICC" };
+                WebPData metadata = { NULL, 0 };
+                // Construct metadata from sub-blocks.
+                // Usual case (including ICC profile): In each sub-block, the
+                // first byte specifies its size in bytes (0 to 255) and the
+                // rest of the bytes contain the data.
+                // Special case for XMP data: In each sub-block, the first byte
+                // is also part of the XMP payload. XMP in GIF also has a 257
+                // byte padding data. See the XMP specification for details.
+                while (1) {
+                  WebPData prev_metadata = metadata;
+                  WebPData subblock;
+                  if (DGifGetExtensionNext(gif, &data) == GIF_ERROR) {
+                    WebPDataClear(&metadata);
+                    goto End;
+                  }
+                  if (data == NULL) break;  // Finished.
+                  subblock.size = is_xmp ? data[0] + 1 : data[0];
+                  assert(subblock.size > 0);
+                  subblock.bytes = is_xmp ? data : data + 1;
+                  metadata.bytes =
+                      (uint8_t*)realloc((void*)metadata.bytes,
+                                        prev_metadata.size + subblock.size);
+                  if (metadata.bytes == NULL) {
+                    WebPDataClear(&prev_metadata);
+                    goto End;
+                  }
+                  metadata.size += subblock.size;
+                  memcpy((void*)(metadata.bytes + prev_metadata.size),
+                         subblock.bytes, subblock.size);
+                }
+                if (is_xmp) {
+                  // XMP padding data is 0x01, 0xff, 0xfe ... 0x01, 0x00.
+                  const size_t xmp_pading_size = 257;
+                  if (metadata.size > xmp_pading_size) {
+                    metadata.size -= xmp_pading_size;
+                  }
+                }
+
+                // Add metadata chunk.
+                err = WebPMuxSetChunk(mux, fourccs[is_icc], &metadata, 1);
+                if (verbose) {
+                  printf("%s size: %d\n", features[is_icc], (int)metadata.size);
+                }
+                WebPDataClear(&metadata);
+                if (err != WEBP_MUX_OK) {
+                  fprintf(stderr, "ERROR (%s): Could not set %s chunk.\n",
+                          ErrorString(err), features[is_icc]);
                  goto End;
                }
                if (is_icc) {
@ -432,88 +486,30 @@ int main(int argc, const char *argv[]) {
    }
  } while (!done);

-  // Last NULL frame.
-  if (!WebPAnimEncoderAdd(enc, NULL, frame_timestamp, NULL)) {
-    fprintf(stderr, "Error flushing WebP muxer.\n");
-    fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
-  }
-
-  if (!WebPAnimEncoderAssemble(enc, &webp_data)) {
-    fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
+  // Finish muxing
+  err = WebPMuxSetAnimationParams(mux, &anim);
+  if (err != WEBP_MUX_OK) {
+    fprintf(stderr, "ERROR (%s): Could not set animation parameters.\n",
+            ErrorString(err));
    goto End;
  }

-  if (stored_loop_count || stored_icc || stored_xmp) {
-    // Re-mux to add loop count and/or metadata as needed.
-    mux = WebPMuxCreate(&webp_data, 1);
-    if (mux == NULL) {
-      fprintf(stderr, "ERROR: Could not re-mux to add loop count/metadata.\n");
-      goto End;
-    }
-    WebPDataClear(&webp_data);
-
-    if (stored_loop_count) {  // Update loop count.
-      WebPMuxAnimParams new_params;
-      err = WebPMuxGetAnimationParams(mux, &new_params);
-      if (err != WEBP_MUX_OK) {
-        fprintf(stderr, "ERROR (%s): Could not fetch loop count.\n",
-                ErrorString(err));
-        goto End;
-      }
-      new_params.loop_count = loop_count;
-      err = WebPMuxSetAnimationParams(mux, &new_params);
-      if (err != WEBP_MUX_OK) {
-        fprintf(stderr, "ERROR (%s): Could not update loop count.\n",
-                ErrorString(err));
-        goto End;
-      }
-    }
-
-    if (stored_icc) {   // Add ICCP chunk.
-      err = WebPMuxSetChunk(mux, "ICCP", &icc_data, 1);
-      if (verbose) {
-        fprintf(stderr, "ICC size: %d\n", (int)icc_data.size);
-      }
-      if (err != WEBP_MUX_OK) {
-        fprintf(stderr, "ERROR (%s): Could not set ICC chunk.\n",
-                ErrorString(err));
-        goto End;
-      }
-    }
-
-    if (stored_xmp) {   // Add XMP chunk.
-      err = WebPMuxSetChunk(mux, "XMP ", &xmp_data, 1);
-      if (verbose) {
-        fprintf(stderr, "XMP size: %d\n", (int)xmp_data.size);
-      }
-      if (err != WEBP_MUX_OK) {
-        fprintf(stderr, "ERROR (%s): Could not set XMP chunk.\n",
-                ErrorString(err));
-        goto End;
-      }
-    }
-
-    err = WebPMuxAssemble(mux, &webp_data);
-    if (err != WEBP_MUX_OK) {
-      fprintf(stderr, "ERROR (%s): Could not assemble when re-muxing to add "
-              "loop count/metadata.\n", ErrorString(err));
-      goto End;
-    }
+  err = WebPMuxAssemble(mux, &webp_data);
+  if (err != WEBP_MUX_OK) {
+    fprintf(stderr, "ERROR (%s) assembling the WebP file.\n", ErrorString(err));
+    goto End;
  }
-
  if (out_file != NULL) {
    if (!ExUtilWriteFile(out_file, webp_data.bytes, webp_data.size)) {
      fprintf(stderr, "Error writing output file: %s\n", out_file);
      goto End;
    }
    if (!quiet) {
-      fprintf(stderr, "Saved output file (%d bytes): %s\n",
-              (int)webp_data.size, out_file);
+      printf("Saved output file: %s\n", out_file);
    }
  } else {
    if (!quiet) {
-      fprintf(stderr, "Nothing written; use -o flag to save the result "
-                      "(%d bytes).\n", (int)webp_data.size);
+      printf("Nothing written; use -o flag to save the result.\n");
    }
  }

@ -522,39 +518,19 @@ int main(int argc, const char *argv[]) {
  gif_error = GIF_OK;

 End:
-  WebPDataClear(&icc_data);
-  WebPDataClear(&xmp_data);
-  WebPMuxDelete(mux);
  WebPDataClear(&webp_data);
-  WebPPictureFree(&frame);
-  WebPPictureFree(&curr_canvas);
-  WebPPictureFree(&prev_canvas);
-  WebPPictureFree(&prev_to_prev_canvas);
-  WebPAnimEncoderDelete(enc);
+  WebPMuxDelete(mux);
+  WebPPictureFree(&picture);
  if (out != NULL && out_file != NULL) fclose(out);

  if (gif_error != GIF_OK) {
-    GIFDisplayError(gif, gif_error);
+    DisplayGifError(gif, gif_error);
  }
  if (gif != NULL) {
-#if LOCAL_GIF_PREREQ(5,1)
-    DGifCloseFile(gif, &gif_error);
-#else
    DGifCloseFile(gif);
-#endif
  }

  return !ok;
 }

-#else  // !WEBP_HAVE_GIF
-
-int main(int argc, const char *argv[]) {
-  fprintf(stderr, "GIF support not enabled in %s.\n", argv[0]);
-  (void)argc;
-  return 0;
-}
-
-#endif
-
 //------------------------------------------------------------------------------
--- a/examples/gifdec.c
+++ b/examples/gifdec.c
@ -1,396 +0,0 @@
-// Copyright 2012 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// GIF decode.
-
-#include "./gifdec.h"
-
-#include <stdio.h>
-
-#ifdef WEBP_HAVE_GIF
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "webp/encode.h"
-#include "webp/mux_types.h"
-
-#define GIF_TRANSPARENT_COLOR 0x00ffffff
-#define GIF_WHITE_COLOR       0xffffffff
-#define GIF_TRANSPARENT_MASK  0x01
-#define GIF_DISPOSE_MASK      0x07
-#define GIF_DISPOSE_SHIFT     2
-
-// from utils/utils.h
-extern void WebPCopyPlane(const uint8_t* src, int src_stride,
-                          uint8_t* dst, int dst_stride,
-                          int width, int height);
-extern void WebPCopyPixels(const WebPPicture* const src,
-                           WebPPicture* const dst);
-
-void GIFGetBackgroundColor(const ColorMapObject* const color_map,
-                           int bgcolor_index, int transparent_index,
-                           uint32_t* const bgcolor) {
-  if (transparent_index != GIF_INDEX_INVALID &&
-      bgcolor_index == transparent_index) {
-    *bgcolor = GIF_TRANSPARENT_COLOR;  // Special case.
-  } else if (color_map == NULL || color_map->Colors == NULL
-             || bgcolor_index >= color_map->ColorCount) {
-    *bgcolor = GIF_WHITE_COLOR;
-    fprintf(stderr,
-            "GIF decode warning: invalid background color index. Assuming "
-            "white background.\n");
-  } else {
-    const GifColorType color = color_map->Colors[bgcolor_index];
-    *bgcolor = (0xff        << 24)
-             | (color.Red   << 16)
-             | (color.Green <<  8)
-             | (color.Blue  <<  0);
-  }
-}
-
-int GIFReadGraphicsExtension(const GifByteType* const buf, int* const duration,
-                             GIFDisposeMethod* const dispose,
-                             int* const transparent_index) {
-  const int flags = buf[1];
-  const int dispose_raw = (flags >> GIF_DISPOSE_SHIFT) & GIF_DISPOSE_MASK;
-  const int duration_raw = buf[2] | (buf[3] << 8);  // In 10 ms units.
-  if (buf[0] != 4) return 0;
-  *duration = duration_raw * 10;  // Duration is in 1 ms units.
-  switch (dispose_raw) {
-    case 3:
-      *dispose = GIF_DISPOSE_RESTORE_PREVIOUS;
-      break;
-    case 2:
-      *dispose = GIF_DISPOSE_BACKGROUND;
-      break;
-    case 1:
-    case 0:
-    default:
-      *dispose = GIF_DISPOSE_NONE;
-      break;
-  }
-  *transparent_index =
-      (flags & GIF_TRANSPARENT_MASK) ? buf[4] : GIF_INDEX_INVALID;
-  return 1;
-}
-
-static void Remap(const GifFileType* const gif, const uint8_t* const src,
-                  int len, int transparent_index, uint32_t* dst) {
-  int i;
-  const GifColorType* colors;
-  const ColorMapObject* const cmap =
-      gif->Image.ColorMap ? gif->Image.ColorMap : gif->SColorMap;
-  if (cmap == NULL) return;
-  colors = cmap->Colors;
-
-  for (i = 0; i < len; ++i) {
-    const GifColorType c = colors[src[i]];
-    dst[i] = (src[i] == transparent_index) ? GIF_TRANSPARENT_COLOR
-           : c.Blue | (c.Green << 8) | (c.Red << 16) | (0xff << 24);
-  }
-}
-
-int GIFReadFrame(GifFileType* const gif, int transparent_index,
-                 GIFFrameRect* const gif_rect, WebPPicture* const picture) {
-  WebPPicture sub_image;
-  const GifImageDesc* const image_desc = &gif->Image;
-  uint32_t* dst = NULL;
-  uint8_t* tmp = NULL;
-  int ok = 0;
-  GIFFrameRect rect = {
-      image_desc->Left, image_desc->Top, image_desc->Width, image_desc->Height
-  };
-  *gif_rect = rect;
-
-  // Use a view for the sub-picture:
-  if (!WebPPictureView(picture, rect.x_offset, rect.y_offset,
-                       rect.width, rect.height, &sub_image)) {
-    fprintf(stderr, "Sub-image %dx%d at position %d,%d is invalid!\n",
-            rect.width, rect.height, rect.x_offset, rect.y_offset);
-    return 0;
-  }
-  dst = sub_image.argb;
-
-  tmp = (uint8_t*)malloc(rect.width * sizeof(*tmp));
-  if (tmp == NULL) goto End;
-
-  if (image_desc->Interlace) {  // Interlaced image.
-    // We need 4 passes, with the following offsets and jumps.
-    const int interlace_offsets[] = { 0, 4, 2, 1 };
-    const int interlace_jumps[]   = { 8, 8, 4, 2 };
-    int pass;
-    for (pass = 0; pass < 4; ++pass) {
-      int y;
-      for (y = interlace_offsets[pass]; y < rect.height;
-           y += interlace_jumps[pass]) {
-        if (DGifGetLine(gif, tmp, rect.width) == GIF_ERROR) goto End;
-        Remap(gif, tmp, rect.width, transparent_index,
-              dst + y * sub_image.argb_stride);
-      }
-    }
-  } else {  // Non-interlaced image.
-    int y;
-    for (y = 0; y < rect.height; ++y) {
-      if (DGifGetLine(gif, tmp, rect.width) == GIF_ERROR) goto End;
-      Remap(gif, tmp, rect.width, transparent_index,
-            dst + y * sub_image.argb_stride);
-    }
-  }
-  ok = 1;
-
- End:
-  if (!ok) picture->error_code = sub_image.error_code;
-  WebPPictureFree(&sub_image);
-  free(tmp);
-  return ok;
-}
-
-int GIFReadLoopCount(GifFileType* const gif, GifByteType** const buf,
-                     int* const loop_count) {
-  assert(!memcmp(*buf + 1, "NETSCAPE2.0", 11) ||
-         !memcmp(*buf + 1, "ANIMEXTS1.0", 11));
-  if (DGifGetExtensionNext(gif, buf) == GIF_ERROR) {
-    return 0;
-  }
-  if (*buf == NULL) {
-    return 0;  // Loop count sub-block missing.
-  }
-  if ((*buf)[0] < 3 || (*buf)[1] != 1) {
-    return 0;   // wrong size/marker
-  }
-  *loop_count = (*buf)[2] | ((*buf)[3] << 8);
-  return 1;
-}
-
-int GIFReadMetadata(GifFileType* const gif, GifByteType** const buf,
-                    WebPData* const metadata) {
-  const int is_xmp = !memcmp(*buf + 1, "XMP DataXMP", 11);
-  const int is_icc = !memcmp(*buf + 1, "ICCRGBG1012", 11);
-  assert(is_xmp || is_icc);
-  (void)is_icc;  // silence unused warning.
-  // Construct metadata from sub-blocks.
-  // Usual case (including ICC profile): In each sub-block, the
-  // first byte specifies its size in bytes (0 to 255) and the
-  // rest of the bytes contain the data.
-  // Special case for XMP data: In each sub-block, the first byte
-  // is also part of the XMP payload. XMP in GIF also has a 257
-  // byte padding data. See the XMP specification for details.
-  while (1) {
-    WebPData subblock;
-    const uint8_t* tmp;
-    if (DGifGetExtensionNext(gif, buf) == GIF_ERROR) {
-      return 0;
-    }
-    if (*buf == NULL) break;  // Finished.
-    subblock.size = is_xmp ? (*buf)[0] + 1 : (*buf)[0];
-    assert(subblock.size > 0);
-    subblock.bytes = is_xmp ? *buf : *buf + 1;
-    // Note: We store returned value in 'tmp' first, to avoid
-    // leaking old memory in metadata->bytes on error.
-    tmp = (uint8_t*)realloc((void*)metadata->bytes,
-                            metadata->size + subblock.size);
-    if (tmp == NULL) {
-      return 0;
-    }
-    memcpy((void*)(tmp + metadata->size),
-           subblock.bytes, subblock.size);
-    metadata->bytes = tmp;
-    metadata->size += subblock.size;
-  }
-  if (is_xmp) {
-    // XMP padding data is 0x01, 0xff, 0xfe ... 0x01, 0x00.
-    const size_t xmp_pading_size = 257;
-    if (metadata->size > xmp_pading_size) {
-      metadata->size -= xmp_pading_size;
-    }
-  }
-  return 1;
-}
-
-static void ClearRectangle(WebPPicture* const picture,
-                           int left, int top, int width, int height) {
-  int j;
-  for (j = top; j < top + height; ++j) {
-    uint32_t* const dst = picture->argb + j * picture->argb_stride;
-    int i;
-    for (i = left; i < left + width; ++i) {
-      dst[i] = GIF_TRANSPARENT_COLOR;
-    }
-  }
-}
-
-void GIFClearPic(WebPPicture* const pic, const GIFFrameRect* const rect) {
-  if (rect != NULL) {
-    ClearRectangle(pic, rect->x_offset, rect->y_offset,
-                   rect->width, rect->height);
-  } else {
-    ClearRectangle(pic, 0, 0, pic->width, pic->height);
-  }
-}
-
-void GIFCopyPixels(const WebPPicture* const src, WebPPicture* const dst) {
-  WebPCopyPixels(src, dst);
-}
-
-void GIFDisposeFrame(GIFDisposeMethod dispose, const GIFFrameRect* const rect,
-                     const WebPPicture* const prev_canvas,
-                     WebPPicture* const curr_canvas) {
-  assert(rect != NULL);
-  if (dispose == GIF_DISPOSE_BACKGROUND) {
-    GIFClearPic(curr_canvas, rect);
-  } else if (dispose == GIF_DISPOSE_RESTORE_PREVIOUS) {
-    const int src_stride = prev_canvas->argb_stride;
-    const uint32_t* const src =
-        prev_canvas->argb + rect->x_offset + rect->y_offset * src_stride;
-    const int dst_stride = curr_canvas->argb_stride;
-    uint32_t* const dst =
-        curr_canvas->argb + rect->x_offset + rect->y_offset * dst_stride;
-    assert(prev_canvas != NULL);
-    WebPCopyPlane((uint8_t*)src, 4 * src_stride, (uint8_t*)dst, 4 * dst_stride,
-                  4 * rect->width, rect->height);
-  }
-}
-
-void GIFBlendFrames(const WebPPicture* const src,
-                    const GIFFrameRect* const rect, WebPPicture* const dst) {
-  int j;
-  assert(src->width == dst->width && src->height == dst->height);
-  for (j = rect->y_offset; j < rect->y_offset + rect->height; ++j) {
-    int i;
-    for (i = rect->x_offset; i < rect->x_offset + rect->width; ++i) {
-      const uint32_t src_pixel = src->argb[j * src->argb_stride + i];
-      const int src_alpha = src_pixel >> 24;
-      if (src_alpha != 0) {
-        dst->argb[j * dst->argb_stride + i] = src_pixel;
-      }
-    }
-  }
-}
-
-void GIFDisplayError(const GifFileType* const gif, int gif_error) {
-  // libgif 4.2.0 has retired PrintGifError() and added GifErrorString().
-#if LOCAL_GIF_PREREQ(4,2)
-#if LOCAL_GIF_PREREQ(5,0)
-  // Static string actually, hence the const char* cast.
-  const char* error_str = (const char*)GifErrorString(
-      (gif == NULL) ? gif_error : gif->Error);
-#else
-  const char* error_str = (const char*)GifErrorString();
-  (void)gif;
-#endif
-  if (error_str == NULL) error_str = "Unknown error";
-  fprintf(stderr, "GIFLib Error %d: %s\n", gif_error, error_str);
-#else
-  (void)gif;
-  fprintf(stderr, "GIFLib Error %d: ", gif_error);
-  PrintGifError();
-  fprintf(stderr, "\n");
-#endif
-}
-
-#else  // !WEBP_HAVE_GIF
-
-static void ErrorGIFNotAvailable() {
-  fprintf(stderr, "GIF support not compiled. Please install the libgif-dev "
-          "package before building.\n");
-}
-
-void GIFGetBackgroundColor(const struct ColorMapObject* const color_map,
-                           int bgcolor_index, int transparent_index,
-                           uint32_t* const bgcolor) {
-  (void)color_map;
-  (void)bgcolor_index;
-  (void)transparent_index;
-  (void)bgcolor;
-  ErrorGIFNotAvailable();
-}
-
-int GIFReadGraphicsExtension(const GifByteType* const data, int* const duration,
-                             GIFDisposeMethod* const dispose,
-                             int* const transparent_index) {
-  (void)data;
-  (void)duration;
-  (void)dispose;
-  (void)transparent_index;
-  ErrorGIFNotAvailable();
-  return 0;
-}
-
-int GIFReadFrame(struct GifFileType* const gif, int transparent_index,
-                 GIFFrameRect* const gif_rect,
-                 struct WebPPicture* const picture) {
-  (void)gif;
-  (void)transparent_index;
-  (void)gif_rect;
-  (void)picture;
-  ErrorGIFNotAvailable();
-  return 0;
-}
-
-int GIFReadLoopCount(struct GifFileType* const gif, GifByteType** const buf,
-                     int* const loop_count) {
-  (void)gif;
-  (void)buf;
-  (void)loop_count;
-  ErrorGIFNotAvailable();
-  return 0;
-}
-
-int GIFReadMetadata(struct GifFileType* const gif, GifByteType** const buf,
-                    struct WebPData* const metadata) {
-  (void)gif;
-  (void)buf;
-  (void)metadata;
-  ErrorGIFNotAvailable();
-  return 0;
-}
-
-void GIFDisposeFrame(GIFDisposeMethod dispose, const GIFFrameRect* const rect,
-                     const struct WebPPicture* const prev_canvas,
-                     struct WebPPicture* const curr_canvas) {
-  (void)dispose;
-  (void)rect;
-  (void)prev_canvas;
-  (void)curr_canvas;
-  ErrorGIFNotAvailable();
-}
-
-void GIFBlendFrames(const struct WebPPicture* const src,
-                    const GIFFrameRect* const rect,
-                    struct WebPPicture* const dst) {
-  (void)src;
-  (void)rect;
-  (void)dst;
-  ErrorGIFNotAvailable();
-}
-
-void GIFDisplayError(const struct GifFileType* const gif, int gif_error) {
-  (void)gif;
-  (void)gif_error;
-  ErrorGIFNotAvailable();
-}
-
-void GIFClearPic(struct WebPPicture* const pic,
-                 const GIFFrameRect* const rect) {
-  (void)pic;
-  (void)rect;
-  ErrorGIFNotAvailable();
-}
-
-void GIFCopyPixels(const struct WebPPicture* const src,
-                   struct WebPPicture* const dst) {
-  (void)src;
-  (void)dst;
-  ErrorGIFNotAvailable();
-}
-
-#endif  // WEBP_HAVE_GIF
-
-// -----------------------------------------------------------------------------
--- a/examples/gifdec.h
+++ b/examples/gifdec.h
@ -1,116 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// GIF decode.
-
-#ifndef WEBP_EXAMPLES_GIFDEC_H_
-#define WEBP_EXAMPLES_GIFDEC_H_
-
-#include <stdio.h>
-#include "webp/types.h"
-
-#ifdef HAVE_CONFIG_H
-#include "webp/config.h"
-#endif
-
-#ifdef WEBP_HAVE_GIF
-#include <gif_lib.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
-#if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
-# define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
-# define LOCAL_GIF_PREREQ(maj, min) \
-    (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_GIF_VERSION 0
-# define LOCAL_GIF_PREREQ(maj, min) 0
-#endif
-
-#define GIF_INDEX_INVALID (-1)
-
-typedef enum GIFDisposeMethod {
-  GIF_DISPOSE_NONE,
-  GIF_DISPOSE_BACKGROUND,
-  GIF_DISPOSE_RESTORE_PREVIOUS
-} GIFDisposeMethod;
-
-typedef struct {
-  int x_offset, y_offset, width, height;
-} GIFFrameRect;
-
-struct WebPData;
-struct WebPPicture;
-
-#ifndef WEBP_HAVE_GIF
-struct ColorMapObject;
-struct GifFileType;
-typedef unsigned char GifByteType;
-#endif
-
-// Given the index of background color and transparent color, returns the
-// corresponding background color (in BGRA format) in 'bgcolor'.
-void GIFGetBackgroundColor(const struct ColorMapObject* const color_map,
-                           int bgcolor_index, int transparent_index,
-                           uint32_t* const bgcolor);
-
-// Parses the given graphics extension data to get frame duration (in 1ms
-// units), dispose method and transparent color index.
-// Returns true on success.
-int GIFReadGraphicsExtension(const GifByteType* const buf, int* const duration,
-                             GIFDisposeMethod* const dispose,
-                             int* const transparent_index);
-
-// Reads the next GIF frame from 'gif' into 'picture'. Also, returns the GIF
-// frame dimensions and offsets in 'rect'.
-// Returns true on success.
-int GIFReadFrame(struct GifFileType* const gif, int transparent_index,
-                 GIFFrameRect* const gif_rect,
-                 struct WebPPicture* const picture);
-
-// Parses loop count from the given Netscape extension data.
-int GIFReadLoopCount(struct GifFileType* const gif, GifByteType** const buf,
-                     int* const loop_count);
-
-// Parses the given ICC or XMP extension data and stores it into 'metadata'.
-// Returns true on success.
-int GIFReadMetadata(struct GifFileType* const gif, GifByteType** const buf,
-                    struct WebPData* const metadata);
-
-// Dispose the pixels within 'rect' of 'curr_canvas' based on 'dispose' method
-// and 'prev_canvas'.
-void GIFDisposeFrame(GIFDisposeMethod dispose, const GIFFrameRect* const rect,
-                     const struct WebPPicture* const prev_canvas,
-                     struct WebPPicture* const curr_canvas);
-
-// Given 'src' picture and its frame rectangle 'rect', blend it into 'dst'.
-void GIFBlendFrames(const struct WebPPicture* const src,
-                    const GIFFrameRect* const rect,
-                    struct WebPPicture* const dst);
-
-// Prints an error string based on 'gif_error'.
-void GIFDisplayError(const struct GifFileType* const gif, int gif_error);
-
-// In the given 'pic', clear the pixels in 'rect' to transparent color.
-void GIFClearPic(struct WebPPicture* const pic, const GIFFrameRect* const rect);
-
-// Copy pixels from 'src' to 'dst' honoring strides. 'src' and 'dst' are assumed
-// to be already allocated.
-void GIFCopyPixels(const struct WebPPicture* const src,
-                   struct WebPPicture* const dst);
-
-#ifdef __cplusplus
-}    // extern "C"
-#endif
-
-#endif  // WEBP_EXAMPLES_GIFDEC_H_
--- a/examples/jpegdec.c
+++ b/examples/jpegdec.c
@ -12,20 +12,18 @@
 #include "./jpegdec.h"

 #ifdef HAVE_CONFIG_H
-#include "webp/config.h"
+#include "config.h"
 #endif

 #include <stdio.h>

 #ifdef WEBP_HAVE_JPEG
 #include <jpeglib.h>
-#include <jerror.h>
 #include <setjmp.h>
 #include <stdlib.h>
 #include <string.h>

 #include "webp/encode.h"
-#include "./example_util.h"
 #include "./metadata.h"

 // -----------------------------------------------------------------------------
@ -210,84 +208,34 @@ static void my_error_exit(j_common_ptr dinfo) {
  longjmp(myerr->setjmp_buffer, 1);
 }

-typedef struct {
-  struct jpeg_source_mgr pub;
-  const uint8_t* data;
-  size_t data_size;
-} JPEGReadContext;
-
-static void ContextInit(j_decompress_ptr cinfo) {
-  JPEGReadContext* const ctx = (JPEGReadContext*)cinfo->src;
-  ctx->pub.next_input_byte = ctx->data;
-  ctx->pub.bytes_in_buffer = ctx->data_size;
-}
-
-static int ContextFill(j_decompress_ptr cinfo) {
-  // we shouldn't get here.
-  ERREXIT(cinfo, JERR_FILE_READ);
-  return 0;
-}
-
-static void ContextSkip(j_decompress_ptr cinfo, long jump_size) {
-  JPEGReadContext* const ctx = (JPEGReadContext*)cinfo->src;
-  size_t jump = (size_t)jump_size;
-  if (jump > ctx->pub.bytes_in_buffer) {  // Don't overflow the buffer.
-    jump = ctx->pub.bytes_in_buffer;
-  }
-  ctx->pub.bytes_in_buffer -= jump;
-  ctx->pub.next_input_byte += jump;
-}
-
-static void ContextTerm(j_decompress_ptr cinfo) {
-  (void)cinfo;
-}
-
-static void ContextSetup(volatile struct jpeg_decompress_struct* const cinfo,
-                         JPEGReadContext* const ctx) {
-  cinfo->src = (struct jpeg_source_mgr*)ctx;
-  ctx->pub.init_source = ContextInit;
-  ctx->pub.fill_input_buffer = ContextFill;
-  ctx->pub.skip_input_data = ContextSkip;
-  ctx->pub.resync_to_restart = jpeg_resync_to_restart;
-  ctx->pub.term_source = ContextTerm;
-  ctx->pub.bytes_in_buffer = 0;
-  ctx->pub.next_input_byte = NULL;
-}
-
-int ReadJPEG(const uint8_t* const data, size_t data_size,
-             WebPPicture* const pic, Metadata* const metadata) {
-  volatile int ok = 0;
+int ReadJPEG(FILE* in_file, WebPPicture* const pic, Metadata* const metadata) {
+  int ok = 0;
  int stride, width, height;
-  volatile struct jpeg_decompress_struct dinfo;
+  struct jpeg_decompress_struct dinfo;
  struct my_error_mgr jerr;
-  uint8_t* volatile rgb = NULL;
+  uint8_t* rgb = NULL;
  JSAMPROW buffer[1];
-  JPEGReadContext ctx;

-  memset(&ctx, 0, sizeof(ctx));
-  ctx.data = data;
-  ctx.data_size = data_size;
-
-  memset((j_decompress_ptr)&dinfo, 0, sizeof(dinfo));   // for setjmp sanity
  dinfo.err = jpeg_std_error(&jerr.pub);
  jerr.pub.error_exit = my_error_exit;

  if (setjmp(jerr.setjmp_buffer)) {
 Error:
    MetadataFree(metadata);
-    jpeg_destroy_decompress((j_decompress_ptr)&dinfo);
+    jpeg_destroy_decompress(&dinfo);
    goto End;
  }

-  jpeg_create_decompress((j_decompress_ptr)&dinfo);
-  ContextSetup(&dinfo, &ctx);
-  if (metadata != NULL) SaveMetadataMarkers((j_decompress_ptr)&dinfo);
-  jpeg_read_header((j_decompress_ptr)&dinfo, TRUE);
+  jpeg_create_decompress(&dinfo);
+  jpeg_stdio_src(&dinfo, in_file);
+  if (metadata != NULL) SaveMetadataMarkers(&dinfo);
+  jpeg_read_header(&dinfo, TRUE);

  dinfo.out_color_space = JCS_RGB;
+  dinfo.dct_method = JDCT_IFAST;
  dinfo.do_fancy_upsampling = TRUE;

-  jpeg_start_decompress((j_decompress_ptr)&dinfo);
+  jpeg_start_decompress(&dinfo);

  if (dinfo.output_components != 3) {
    goto Error;
@ -304,22 +252,22 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
  buffer[0] = (JSAMPLE*)rgb;

  while (dinfo.output_scanline < dinfo.output_height) {
-    if (jpeg_read_scanlines((j_decompress_ptr)&dinfo, buffer, 1) != 1) {
+    if (jpeg_read_scanlines(&dinfo, buffer, 1) != 1) {
      goto End;
    }
    buffer[0] += stride;
  }

  if (metadata != NULL) {
-    ok = ExtractMetadataFromJPEG((j_decompress_ptr)&dinfo, metadata);
+    ok = ExtractMetadataFromJPEG(&dinfo, metadata);
    if (!ok) {
      fprintf(stderr, "Error extracting JPEG metadata!\n");
      goto Error;
    }
  }

-  jpeg_finish_decompress((j_decompress_ptr)&dinfo);
-  jpeg_destroy_decompress((j_decompress_ptr)&dinfo);
+  jpeg_finish_decompress(&dinfo);
+  jpeg_destroy_decompress(&dinfo);

  // WebP conversion.
  pic->width = width;
@ -332,11 +280,9 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
  return ok;
 }
 #else  // !WEBP_HAVE_JPEG
-int ReadJPEG(const uint8_t* const data, size_t data_size,
-             struct WebPPicture* const pic,
+int ReadJPEG(FILE* in_file, struct WebPPicture* const pic,
             struct Metadata* const metadata) {
-  (void)data;
-  (void)data_size;
+  (void)in_file;
  (void)pic;
  (void)metadata;
  fprintf(stderr, "JPEG support not compiled. Please install the libjpeg "
--- a/examples/jpegdec.h
+++ b/examples/jpegdec.h
@ -15,20 +15,20 @@
 #include <stdio.h>
 #include "webp/types.h"

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

 struct Metadata;
 struct WebPPicture;

-// Reads a JPEG from 'data', returning the decoded output in 'pic'.
-// The output is RGB or YUV depending on pic->use_argb value.
+// Reads a JPEG from 'in_file', returning the decoded output in 'pic'.
+// The output is RGB.
 // Returns true on success.
-int ReadJPEG(const uint8_t* const data, size_t data_size,
-             struct WebPPicture* const pic, struct Metadata* const metadata);
+int ReadJPEG(FILE* in_file, struct WebPPicture* const pic,
+             struct Metadata* const metadata);

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/examples/metadata.h
+++ b/examples/metadata.h
@ -15,7 +15,7 @@

 #include "webp/types.h"

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

@ -40,7 +40,7 @@ void MetadataFree(Metadata* const metadata);
 int MetadataCopy(const char* metadata, size_t metadata_len,
                 MetadataPayload* const payload);

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/examples/pngdec.c
+++ b/examples/pngdec.c
@ -12,24 +12,21 @@
 #include "./pngdec.h"

 #ifdef HAVE_CONFIG_H
-#include "webp/config.h"
+#include "config.h"
 #endif

 #include <stdio.h>

 #ifdef WEBP_HAVE_PNG
-#include <assert.h>
 #include <png.h>
 #include <setjmp.h>   // note: this must be included *after* png.h
 #include <stdlib.h>
-#include <string.h>

 #include "webp/encode.h"
-#include "./example_util.h"
 #include "./metadata.h"

-static void PNGAPI error_function(png_structp png, png_const_charp error) {
-  if (error != NULL) fprintf(stderr, "libpng error: %s\n", error);
+static void PNGAPI error_function(png_structp png, png_const_charp dummy) {
+  (void)dummy;  // remove variable-unused warning
  longjmp(png_jmpbuf(png), 1);
 }

@ -133,8 +130,8 @@ static int ExtractMetadataFromPNG(png_structp png,
  for (p = 0; p < 2; ++p)  {
    png_infop const info = (p == 0) ? head_info : end_info;
    png_textp text = NULL;
-    const png_uint_32 num = png_get_text(png, info, &text, NULL);
-    png_uint_32 i;
+    const int num = png_get_text(png, info, &text, NULL);
+    int i;
    // Look for EXIF / XMP metadata.
    for (i = 0; i < num; ++i, ++text) {
      int j;
@ -190,45 +187,30 @@ static int ExtractMetadataFromPNG(png_structp png,
  return 1;
 }

-typedef struct {
-  const uint8_t* data;
-  size_t data_size;
-  png_size_t offset;
-} PNGReadContext;
-
-static void ReadFunc(png_structp png_ptr, png_bytep data, png_size_t length) {
-  PNGReadContext* const ctx = (PNGReadContext*)png_get_io_ptr(png_ptr);
-  assert(ctx->offset + length <= ctx->data_size);
-  memcpy(data, ctx->data + ctx->offset, length);
-  ctx->offset += length;
-}
-
-int ReadPNG(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic,
-            int keep_alpha, struct Metadata* const metadata) {
-  volatile png_structp png = NULL;
-  volatile png_infop info = NULL;
-  volatile png_infop end_info = NULL;
-  PNGReadContext context = { NULL, 0, 0 };
+int ReadPNG(FILE* in_file, WebPPicture* const pic, int keep_alpha,
+            Metadata* const metadata) {
+  png_structp png;
+  png_infop info = NULL;
+  png_infop end_info = NULL;
  int color_type, bit_depth, interlaced;
  int has_alpha;
  int num_passes;
  int p;
-  volatile int ok = 0;
+  int ok = 0;
  png_uint_32 width, height, y;
-  png_uint_32 stride;
-  uint8_t* volatile rgb = NULL;
-
-  context.data = data;
-  context.data_size = data_size;
+  int stride;
+  uint8_t* rgb = NULL;

  png = png_create_read_struct(PNG_LIBPNG_VER_STRING, 0, 0, 0);
-  if (png == NULL) goto End;
+  if (png == NULL) {
+    goto End;
+  }

  png_set_error_fn(png, 0, error_function, NULL);
  if (setjmp(png_jmpbuf(png))) {
 Error:
    MetadataFree(metadata);
+    png_destroy_read_struct(&png, &info, &end_info);
    goto End;
  }

@ -237,7 +219,7 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
  end_info = png_create_info_struct(png);
  if (end_info == NULL) goto Error;

-  png_set_read_fn(png, &context, ReadFunc);
+  png_init_io(png, in_file);
  png_read_info(png, info);
  if (!png_get_IHDR(png, info,
                    &width, &height, &bit_depth, &color_type, &interlaced,
@ -245,9 +227,7 @@ int ReadPNG(const uint8_t* const data, size_t data_size,

  png_set_strip_16(png);
  png_set_packing(png);
-  if (color_type == PNG_COLOR_TYPE_PALETTE) {
-    png_set_palette_to_rgb(png);
-  }
+  if (color_type == PNG_COLOR_TYPE_PALETTE) png_set_palette_to_rgb(png);
  if (color_type == PNG_COLOR_TYPE_GRAY ||
      color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
    if (bit_depth < 8) {
@ -274,7 +254,7 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
  if (rgb == NULL) goto Error;
  for (p = 0; p < num_passes; ++p) {
    for (y = 0; y < height; ++y) {
-      png_bytep row = (png_bytep)(rgb + y * stride);
+      png_bytep row = rgb + y * stride;
      png_read_rows(png, &row, NULL, 1);
    }
  }
@ -286,29 +266,25 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
    goto Error;
  }

-  pic->width = (int)width;
-  pic->height = (int)height;
-  ok = has_alpha ? WebPPictureImportRGBA(pic, rgb, (int)stride)
-                 : WebPPictureImportRGB(pic, rgb, (int)stride);
+  png_destroy_read_struct(&png, &info, &end_info);
+
+  pic->width = width;
+  pic->height = height;
+  ok = has_alpha ? WebPPictureImportRGBA(pic, rgb, stride)
+                 : WebPPictureImportRGB(pic, rgb, stride);

  if (!ok) {
    goto Error;
  }

 End:
-  if (png != NULL) {
-    png_destroy_read_struct((png_structpp)&png,
-                            (png_infopp)&info, (png_infopp)&end_info);
-  }
  free(rgb);
  return ok;
 }
 #else  // !WEBP_HAVE_PNG
-int ReadPNG(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic,
-            int keep_alpha, struct Metadata* const metadata) {
-  (void)data;
-  (void)data_size;
+int ReadPNG(FILE* in_file, struct WebPPicture* const pic, int keep_alpha,
+            struct Metadata* const metadata) {
+  (void)in_file;
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
--- a/examples/pngdec.h
+++ b/examples/pngdec.h
@ -12,25 +12,23 @@
 #ifndef WEBP_EXAMPLES_PNGDEC_H_
 #define WEBP_EXAMPLES_PNGDEC_H_

-#include "webp/types.h"
+#include <stdio.h>

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

 struct Metadata;
 struct WebPPicture;

-// Reads a PNG from 'data', returning the decoded output in 'pic'.
-// Output is RGBA or YUVA, depending on pic->use_argb value.
+// Reads a PNG from 'in_file', returning the decoded output in 'pic'.
 // If 'keep_alpha' is true and the PNG has an alpha channel, the output is RGBA
-// or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
+// otherwise it will be RGB.
 // Returns true on success.
-int ReadPNG(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic,
-            int keep_alpha, struct Metadata* const metadata);
+int ReadPNG(FILE* in_file, struct WebPPicture* const pic, int keep_alpha,
+            struct Metadata* const metadata);

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/examples/stopwatch.h
+++ b/examples/stopwatch.h
@ -14,17 +14,11 @@
 #ifndef WEBP_EXAMPLES_STOPWATCH_H_
 #define WEBP_EXAMPLES_STOPWATCH_H_

-#include "webp/types.h"
-
 #if defined _WIN32 && !defined __GNUC__
 #include <windows.h>

 typedef LARGE_INTEGER Stopwatch;

-static WEBP_INLINE void StopwatchReset(Stopwatch* watch) {
-  QueryPerformanceCounter(watch);
-}
-
 static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
  const LARGE_INTEGER old_value = *watch;
  LARGE_INTEGER freq;
@ -39,23 +33,15 @@ static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {


 #else    /* !_WIN32 */
-#include <string.h>  // memcpy
 #include <sys/time.h>

 typedef struct timeval Stopwatch;

-static WEBP_INLINE void StopwatchReset(Stopwatch* watch) {
-  gettimeofday(watch, NULL);
-}
-
 static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
-  struct timeval old_value;
-  double delta_sec, delta_usec;
-  memcpy(&old_value, watch, sizeof(old_value));
+  const struct timeval old_value = *watch;
  gettimeofday(watch, NULL);
-  delta_sec = (double)watch->tv_sec - old_value.tv_sec;
-  delta_usec = (double)watch->tv_usec - old_value.tv_usec;
-  return delta_sec + delta_usec / 1000000.0;
+  return watch->tv_sec - old_value.tv_sec +
+      (watch->tv_usec - old_value.tv_usec) / 1000000.0;
 }

 #endif   /* _WIN32 */
--- a/examples/tiffdec.c
+++ b/examples/tiffdec.c
@ -12,11 +12,10 @@
 #include "./tiffdec.h"

 #ifdef HAVE_CONFIG_H
-#include "webp/config.h"
+#include "config.h"
 #endif

 #include <stdio.h>
-#include <string.h>

 #ifdef WEBP_HAVE_TIFF
 #include <tiffio.h>
@ -64,71 +63,17 @@ static int ExtractMetadataFromTIFF(TIFF* const tif, Metadata* const metadata) {
  return 1;
 }

-// Ad-hoc structure to supply read-from-memory functionalities.
-typedef struct {
-  const uint8_t* data;
-  toff_t size;
-  toff_t pos;
-} MyData;
-
-static int MyClose(thandle_t opaque) {
-  (void)opaque;
-  return 0;
-}
-
-static toff_t MySize(thandle_t opaque) {
-  const MyData* const my_data = (MyData*)opaque;
-  return my_data->size;
-}
-
-static toff_t MySeek(thandle_t opaque, toff_t offset, int whence) {
-  MyData* const my_data = (MyData*)opaque;
-  offset += (whence == SEEK_CUR) ? my_data->pos
-          : (whence == SEEK_SET) ? 0
-          : my_data->size;
-  if (offset > my_data->size) return (toff_t)-1;
-  my_data->pos = offset;
-  return offset;
-}
-
-static int MyMapFile(thandle_t opaque, void** base, toff_t* size) {
-  (void)opaque;
-  (void)base;
-  (void)size;
-  return 0;
-}
-static void MyUnmapFile(thandle_t opaque, void* base, toff_t size) {
-  (void)opaque;
-  (void)base;
-  (void)size;
-}
-
-static tsize_t MyRead(thandle_t opaque, void* dst, tsize_t size) {
-  MyData* const my_data = (MyData*)opaque;
-  if (my_data->pos + size > my_data->size) {
-    size = my_data->size - my_data->pos;
-  }
-  if (size > 0) {
-    memcpy(dst, my_data->data + my_data->pos, size);
-    my_data->pos += size;
-  }
-  return size;
-}
-
-int ReadTIFF(const uint8_t* const data, size_t data_size,
+int ReadTIFF(const char* const filename,
             WebPPicture* const pic, int keep_alpha,
             Metadata* const metadata) {
-  MyData my_data = { data, (toff_t)data_size, 0 };
-  TIFF* const tif = TIFFClientOpen("Memory", "r", &my_data,
-                                   MyRead, MyRead, MySeek, MyClose,
-                                   MySize, MyMapFile, MyUnmapFile);
+  TIFF* const tif = TIFFOpen(filename, "r");
  uint32 width, height;
  uint32* raster;
  int ok = 0;
  tdir_t dircount;

  if (tif == NULL) {
-    fprintf(stderr, "Error! Cannot parse TIFF file\n");
+    fprintf(stderr, "Error! Cannot open TIFF file '%s'\n", filename);
    return 0;
  }

@ -142,7 +87,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
  if (!(TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &width) &&
        TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height))) {
    fprintf(stderr, "Error! Cannot retrieve TIFF image dimensions.\n");
-    goto End;
+    return 0;
  }
  raster = (uint32*)_TIFFmalloc(width * height * sizeof(*raster));
  if (raster != NULL) {
@ -152,7 +97,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
      pic->width = width;
      pic->height = height;
      // TIFF data is ABGR
-#ifdef WORDS_BIGENDIAN
+#ifdef __BIG_ENDIAN__
      TIFFSwabArrayOfLong(raster, width * height);
 #endif
      ok = keep_alpha
@ -174,16 +119,15 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
      }
    }
  }
- End:
+
  TIFFClose(tif);
  return ok;
 }
 #else  // !WEBP_HAVE_TIFF
-int ReadTIFF(const uint8_t* const data, size_t data_size,
+int ReadTIFF(const char* const filename,
             struct WebPPicture* const pic, int keep_alpha,
             struct Metadata* const metadata) {
-  (void)data;
-  (void)data_size;
+  (void)filename;
  (void)pic;
  (void)keep_alpha;
  (void)metadata;
--- a/examples/tiffdec.h
+++ b/examples/tiffdec.h
@ -12,25 +12,22 @@
 #ifndef WEBP_EXAMPLES_TIFFDEC_H_
 #define WEBP_EXAMPLES_TIFFDEC_H_

-#include "webp/types.h"
-
-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

 struct Metadata;
 struct WebPPicture;

-// Reads a TIFF from 'data', returning the decoded output in 'pic'.
-// Output is RGBA or YUVA, depending on pic->use_argb value.
+// Reads a TIFF from 'filename', returning the decoded output in 'pic'.
 // If 'keep_alpha' is true and the TIFF has an alpha channel, the output is RGBA
-// or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
+// otherwise it will be RGB.
 // Returns true on success.
-int ReadTIFF(const uint8_t* const data, size_t data_size,
+int ReadTIFF(const char* const filename,
             struct WebPPicture* const pic, int keep_alpha,
             struct Metadata* const metadata);

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/examples/vwebp.c
+++ b/examples/vwebp.c
@ -11,15 +11,13 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 #ifdef HAVE_CONFIG_H
-#include "webp/config.h"
+#include "config.h"
 #endif

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

-#if defined(WEBP_HAVE_GL)
-
 #if defined(HAVE_GLUT_GLUT_H)
 #include <GLUT/glut.h>
 #else
@ -38,10 +36,12 @@

 #include "./example_util.h"

-#if defined(_MSC_VER) && _MSC_VER < 1900
+#ifdef _MSC_VER
 #define snprintf _snprintf
 #endif

+static void Help(void);
+
 // Unfortunate global variables. Gathered into a struct for comfort.
 static struct {
  int has_animation;
@ -57,11 +57,15 @@ static struct {

  const char* file_name;
  WebPData data;
-  WebPDecoderConfig config;
+  WebPDecoderConfig* config;
  const WebPDecBuffer* pic;
  WebPDemuxer* dmux;
-  WebPIterator curr_frame;
-  WebPIterator prev_frame;
+  WebPIterator frameiter;
+  struct {
+    int width, height;
+    int x_offset, y_offset;
+    enum WebPMuxAnimDispose dispose_method;
+  } prev_frame;
  WebPChunkIterator iccp;
 } kParams;

@ -73,23 +77,12 @@ static void ClearPreviousPic(void) {
 static void ClearParams(void) {
  ClearPreviousPic();
  WebPDataClear(&kParams.data);
-  WebPDemuxReleaseIterator(&kParams.curr_frame);
-  WebPDemuxReleaseIterator(&kParams.prev_frame);
+  WebPDemuxReleaseIterator(&kParams.frameiter);
  WebPDemuxReleaseChunkIterator(&kParams.iccp);
  WebPDemuxDelete(kParams.dmux);
  kParams.dmux = NULL;
 }

-// Sets the previous frame to the dimensions of the canvas and has it dispose
-// to background to cause the canvas to be cleared.
-static void ClearPreviousFrame(void) {
-  WebPIterator* const prev = &kParams.prev_frame;
-  prev->width = kParams.canvas_width;
-  prev->height = kParams.canvas_height;
-  prev->x_offset = prev->y_offset = 0;
-  prev->dispose_method = WEBP_MUX_DISPOSE_BACKGROUND;
-}
-
 // -----------------------------------------------------------------------------
 // Color profile handling
 static int ApplyColorProfile(const WebPData* const profile,
@ -154,25 +147,25 @@ static int ApplyColorProfile(const WebPData* const profile,
 //------------------------------------------------------------------------------
 // File decoding

-static int Decode(void) {   // Fills kParams.curr_frame
-  const WebPIterator* const curr = &kParams.curr_frame;
-  WebPDecoderConfig* const config = &kParams.config;
+static int Decode(void) {   // Fills kParams.frameiter
+  const WebPIterator* const iter = &kParams.frameiter;
+  WebPDecoderConfig* const config = kParams.config;
  WebPDecBuffer* const output_buffer = &config->output;
  int ok = 0;

  ClearPreviousPic();
  output_buffer->colorspace = MODE_RGBA;
-  ok = (WebPDecode(curr->fragment.bytes, curr->fragment.size,
+  ok = (WebPDecode(iter->fragment.bytes, iter->fragment.size,
                   config) == VP8_STATUS_OK);
  if (!ok) {
-    fprintf(stderr, "Decoding of frame #%d failed!\n", curr->frame_num);
+    fprintf(stderr, "Decoding of frame #%d failed!\n", iter->frame_num);
  } else {
    kParams.pic = output_buffer;
    if (kParams.use_color_profile) {
      ok = ApplyColorProfile(&kParams.iccp.chunk, output_buffer);
      if (!ok) {
        fprintf(stderr, "Applying color profile to frame #%d failed!\n",
-                curr->frame_num);
+                iter->frame_num);
      }
    }
  }
@ -183,21 +176,19 @@ static void decode_callback(int what) {
  if (what == 0 && !kParams.done) {
    int duration = 0;
    if (kParams.dmux != NULL) {
-      WebPIterator* const curr = &kParams.curr_frame;
-      if (!WebPDemuxNextFrame(curr)) {
-        WebPDemuxReleaseIterator(curr);
-        if (WebPDemuxGetFrame(kParams.dmux, 1, curr)) {
+      WebPIterator* const iter = &kParams.frameiter;
+      if (!WebPDemuxNextFrame(iter)) {
+        WebPDemuxReleaseIterator(iter);
+        if (WebPDemuxGetFrame(kParams.dmux, 1, iter)) {
          --kParams.loop_count;
          kParams.done = (kParams.loop_count == 0);
-          if (kParams.done) return;
-          ClearPreviousFrame();
        } else {
          kParams.decoding_error = 1;
          kParams.done = 1;
          return;
        }
      }
-      duration = curr->duration;
+      duration = iter->duration;
    }
    if (!Decode()) {
      kParams.decoding_error = 1;
@ -290,50 +281,40 @@ static void DrawCheckerBoard(void) {

 static void HandleDisplay(void) {
  const WebPDecBuffer* const pic = kParams.pic;
-  const WebPIterator* const curr = &kParams.curr_frame;
-  WebPIterator* const prev = &kParams.prev_frame;
+  const WebPIterator* const iter = &kParams.frameiter;
  GLfloat xoff, yoff;
  if (pic == NULL) return;
  glPushMatrix();
  glPixelZoom(1, -1);
-  xoff = (GLfloat)(2. * curr->x_offset / kParams.canvas_width);
-  yoff = (GLfloat)(2. * curr->y_offset / kParams.canvas_height);
+  xoff = (GLfloat)(2. * iter->x_offset / kParams.canvas_width);
+  yoff = (GLfloat)(2. * iter->y_offset / kParams.canvas_height);
  glRasterPos2f(-1.f + xoff, 1.f - yoff);
  glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
  glPixelStorei(GL_UNPACK_ROW_LENGTH, pic->u.RGBA.stride / 4);

-  if (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND ||
-      curr->blend_method == WEBP_MUX_NO_BLEND) {
+  if (kParams.prev_frame.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
    // TODO(later): these offsets and those above should factor in window size.
    //              they will be incorrect if the window is resized.
    // glScissor() takes window coordinates (0,0 at bottom left).
-    int window_x, window_y;
-    int frame_w, frame_h;
-    if (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
-      // Clear the previous frame rectangle.
-      window_x = prev->x_offset;
-      window_y = kParams.canvas_height - prev->y_offset - prev->height;
-      frame_w = prev->width;
-      frame_h = prev->height;
-    } else {  // curr->blend_method == WEBP_MUX_NO_BLEND.
-      // We simulate no-blending behavior by first clearing the current frame
-      // rectangle (to a checker-board) and then alpha-blending against it.
-      window_x = curr->x_offset;
-      window_y = kParams.canvas_height - curr->y_offset - curr->height;
-      frame_w = curr->width;
-      frame_h = curr->height;
-    }
+    const int window_x = kParams.prev_frame.x_offset;
+    const int window_y = kParams.canvas_height -
+                         kParams.prev_frame.y_offset -
+                         kParams.prev_frame.height;
    glEnable(GL_SCISSOR_TEST);
-    // Only update the requested area, not the whole canvas.
-    glScissor(window_x, window_y, frame_w, frame_h);
+    // Only updated the requested area, not the whole canvas.
+    glScissor(window_x, window_y,
+              kParams.prev_frame.width, kParams.prev_frame.height);

    glClear(GL_COLOR_BUFFER_BIT);  // use clear color
    DrawCheckerBoard();

    glDisable(GL_SCISSOR_TEST);
  }
-
-  *prev = *curr;
+  kParams.prev_frame.width = iter->width;
+  kParams.prev_frame.height = iter->height;
+  kParams.prev_frame.x_offset = iter->x_offset;
+  kParams.prev_frame.y_offset = iter->y_offset;
+  kParams.prev_frame.dispose_method = iter->dispose_method;

  glDrawPixels(pic->width, pic->height,
               GL_RGBA, GL_UNSIGNED_BYTE,
@ -349,9 +330,9 @@ static void HandleDisplay(void) {
    glColor4f(0.90f, 0.0f, 0.90f, 1.0f);
    glRasterPos2f(-0.95f, 0.80f);
    PrintString(tmp);
-    if (curr->x_offset != 0 || curr->y_offset != 0) {
+    if (iter->x_offset != 0 || iter->y_offset != 0) {
      snprintf(tmp, sizeof(tmp), " (offset:%d,%d)",
-               curr->x_offset, curr->y_offset);
+               iter->x_offset, iter->y_offset);
      glRasterPos2f(-0.95f, 0.70f);
      PrintString(tmp);
    }
@ -387,52 +368,42 @@ static void Help(void) {
  printf("Usage: vwebp in_file [options]\n\n"
         "Decodes the WebP image file and visualize it using OpenGL\n"
         "Options are:\n"
-         "  -version  .... print version number and exit\n"
-         "  -noicc ....... don't use the icc profile if present\n"
-         "  -nofancy ..... don't use the fancy YUV420 upscaler\n"
-         "  -nofilter .... disable in-loop filtering\n"
-         "  -dither <int>  dithering strength (0..100), default=50\n"
-         "  -noalphadither disable alpha plane dithering\n"
-         "  -mt .......... use multi-threading\n"
-         "  -info ........ print info\n"
-         "  -h     ....... this help message\n"
+         "  -version  .... print version number and exit.\n"
+         "  -noicc ....... don't use the icc profile if present.\n"
+         "  -nofancy ..... don't use the fancy YUV420 upscaler.\n"
+         "  -nofilter .... disable in-loop filtering.\n"
+         "  -mt .......... use multi-threading.\n"
+         "  -info ........ print info.\n"
+         "  -h     ....... this help message.\n"
         "\n"
         "Keyboard shortcuts:\n"
-         "  'c' ................ toggle use of color profile\n"
-         "  'i' ................ overlay file information\n"
-         "  'q' / 'Q' / ESC .... quit\n"
+         "  'c' ................ toggle use of color profile.\n"
+         "  'i' ................ overlay file information.\n"
+         "  'q' / 'Q' / ESC .... quit.\n"
        );
 }

 int main(int argc, char *argv[]) {
+  WebPDecoderConfig config;
  int c;
-  WebPDecoderConfig* const config = &kParams.config;
-  WebPIterator* const curr = &kParams.curr_frame;

-  if (!WebPInitDecoderConfig(config)) {
+  if (!WebPInitDecoderConfig(&config)) {
    fprintf(stderr, "Library version mismatch!\n");
    return -1;
  }
-  config->options.dithering_strength = 50;
-  config->options.alpha_dithering_strength = 100;
+  kParams.config = &config;
  kParams.use_color_profile = 1;

  for (c = 1; c < argc; ++c) {
-    int parse_error = 0;
    if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
      Help();
      return 0;
    } else if (!strcmp(argv[c], "-noicc")) {
      kParams.use_color_profile = 0;
    } else if (!strcmp(argv[c], "-nofancy")) {
-      config->options.no_fancy_upsampling = 1;
+      config.options.no_fancy_upsampling = 1;
    } else if (!strcmp(argv[c], "-nofilter")) {
-      config->options.bypass_filtering = 1;
-    } else if (!strcmp(argv[c], "-noalphadither")) {
-      config->options.alpha_dithering_strength = 0;
-    } else if (!strcmp(argv[c], "-dither") && c + 1 < argc) {
-      config->options.dithering_strength =
-          ExUtilGetInt(argv[++c], 0, &parse_error);
+      config.options.bypass_filtering = 1;
    } else if (!strcmp(argv[c], "-info")) {
      kParams.print_info = 1;
    } else if (!strcmp(argv[c], "-version")) {
@ -444,10 +415,7 @@ int main(int argc, char *argv[]) {
             (dmux_version >> 8) & 0xff, dmux_version & 0xff);
      return 0;
    } else if (!strcmp(argv[c], "-mt")) {
-      config->options.use_threads = 1;
-    } else if (!strcmp(argv[c], "--")) {
-      if (c < argc - 1) kParams.file_name = argv[++c];
-      break;
+      config.options.use_threads = 1;
    } else if (argv[c][0] == '-') {
      printf("Unknown option '%s'\n", argv[c]);
      Help();
@ -455,11 +423,6 @@ int main(int argc, char *argv[]) {
    } else {
      kParams.file_name = argv[c];
    }
-
-    if (parse_error) {
-      Help();
-      return -1;
-    }
  }

  if (kParams.file_name == NULL) {
@ -494,7 +457,10 @@ int main(int argc, char *argv[]) {
    printf("Canvas: %d x %d\n", kParams.canvas_width, kParams.canvas_height);
  }

-  ClearPreviousFrame();
+  kParams.prev_frame.width = kParams.canvas_width;
+  kParams.prev_frame.height = kParams.canvas_height;
+  kParams.prev_frame.x_offset = kParams.prev_frame.y_offset = 0;
+  kParams.prev_frame.dispose_method = WEBP_MUX_DISPOSE_BACKGROUND;

  memset(&kParams.iccp, 0, sizeof(kParams.iccp));
  kParams.has_color_profile =
@ -510,28 +476,22 @@ int main(int argc, char *argv[]) {
 #endif
  }

-  if (!WebPDemuxGetFrame(kParams.dmux, 1, curr)) goto Error;
+  if (!WebPDemuxGetFrame(kParams.dmux, 1, &kParams.frameiter)) goto Error;

-  kParams.has_animation = (curr->num_frames > 1);
+  kParams.has_animation = (kParams.frameiter.num_frames > 1);
  kParams.loop_count = (int)WebPDemuxGetI(kParams.dmux, WEBP_FF_LOOP_COUNT);
  kParams.bg_color = WebPDemuxGetI(kParams.dmux, WEBP_FF_BACKGROUND_COLOR);
  printf("VP8X: Found %d images in file (loop count = %d)\n",
-         curr->num_frames, kParams.loop_count);
+         kParams.frameiter.num_frames, kParams.loop_count);

  // Decode first frame
  if (!Decode()) goto Error;

  // Position iterator to last frame. Next call to HandleDisplay will wrap over.
  // We take this into account by bumping up loop_count.
-  WebPDemuxGetFrame(kParams.dmux, 0, curr);
+  WebPDemuxGetFrame(kParams.dmux, 0, &kParams.frameiter);
  if (kParams.loop_count) ++kParams.loop_count;

-#if defined(__unix__) || defined(__CYGWIN__)
-  // Work around GLUT compositor bug.
-  // https://bugs.launchpad.net/ubuntu/+source/freeglut/+bug/369891
-  setenv("XLIB_SKIP_ARGB_VISUALS", "1", 1);
-#endif
-
  // Start display (and timer)
  glutInit(&argc, argv);
 #ifdef FREEGLUT
@ -551,14 +511,4 @@ int main(int argc, char *argv[]) {
  return -1;
 }

-#else   // !WEBP_HAVE_GL
-
-int main(int argc, const char *argv[]) {
-  fprintf(stderr, "OpenGL support not enabled in %s.\n", argv[0]);
-  (void)argc;
-  return 0;
-}
-
-#endif
-
 //------------------------------------------------------------------------------
--- a/examples/webpdec.c
+++ b/examples/webpdec.c
@ -1,71 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// WebP decode.
-
-#include "./webpdec.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "webp/decode.h"
-#include "webp/encode.h"
-#include "./example_util.h"
-#include "./metadata.h"
-
-int ReadWebP(const uint8_t* const data, size_t data_size,
-             WebPPicture* const pic,
-             int keep_alpha, Metadata* const metadata) {
-  int ok = 0;
-  VP8StatusCode status = VP8_STATUS_OK;
-  WebPDecoderConfig config;
-  WebPDecBuffer* const output_buffer = &config.output;
-  WebPBitstreamFeatures* const bitstream = &config.input;
-
-  // TODO(jzern): add Exif/XMP/ICC extraction.
-  if (metadata != NULL) {
-    fprintf(stderr, "Warning: metadata extraction from WebP is unsupported.\n");
-  }
-
-  if (!WebPInitDecoderConfig(&config)) {
-    fprintf(stderr, "Library version mismatch!\n");
-    return 0;
-  }
-
-  status = WebPGetFeatures(data, data_size, bitstream);
-  if (status != VP8_STATUS_OK) {
-    ExUtilPrintWebPError("input data", status);
-    return 0;
-  }
-  {
-    const int has_alpha = keep_alpha && bitstream->has_alpha;
-    // TODO(skal): use MODE_YUV(A), depending on the expected
-    // input pic->use_argb. This would save some conversion steps.
-    output_buffer->colorspace = has_alpha ? MODE_RGBA : MODE_RGB;
-
-    status = ExUtilDecodeWebP(data, data_size, 0, &config);
-    if (status == VP8_STATUS_OK) {
-      const uint8_t* const rgba = output_buffer->u.RGBA.rgba;
-      const int stride = output_buffer->u.RGBA.stride;
-      pic->width = output_buffer->width;
-      pic->height = output_buffer->height;
-      ok = has_alpha ? WebPPictureImportRGBA(pic, rgba, stride)
-                     : WebPPictureImportRGB(pic, rgba, stride);
-    }
-  }
-
-  if (status != VP8_STATUS_OK) {
-    ExUtilPrintWebPError("input data", status);
-  }
-
-  WebPFreeDecBuffer(output_buffer);
-  return ok;
-}
-
-// -----------------------------------------------------------------------------
--- a/examples/webpdec.h
+++ b/examples/webpdec.h
@ -1,37 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// WebP decode.
-
-#ifndef WEBP_EXAMPLES_WEBPDEC_H_
-#define WEBP_EXAMPLES_WEBPDEC_H_
-
-#include "webp/types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct Metadata;
-struct WebPPicture;
-
-// Reads a WebP from 'in_file', returning the decoded output in 'pic'.
-// Output is RGBA or YUVA, depending on pic->use_argb value.
-// If 'keep_alpha' is true and the WebP has an alpha channel, the output is RGBA
-// or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
-// Returns true on success.
-int ReadWebP(const uint8_t* const data, size_t data_size,
-             struct WebPPicture* const pic,
-             int keep_alpha, struct Metadata* const metadata);
-
-#ifdef __cplusplus
-}    // extern "C"
-#endif
-
-#endif  // WEBP_EXAMPLES_WEBPDEC_H_
--- a/examples/webpmux.c
+++ b/examples/webpmux.c
@ -46,14 +46,13 @@
 */

 #ifdef HAVE_CONFIG_H
-#include "webp/config.h"
+#include "config.h"
 #endif

 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include "webp/decode.h"
 #include "webp/mux.h"
 #include "./example_util.h"

@ -130,7 +129,7 @@ static int CountOccurrences(const char* arglist[], int list_length,
  return num_occurences;
 }

-static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
+static const char* const kErrorMessages[] = {
  "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
  "WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"
 };
@ -146,6 +145,12 @@ static const char* ErrorString(WebPMuxError err) {
    return err;                                                      \
  }

+#define RETURN_IF_ERROR2(ERR_MSG, FORMAT_STR)                        \
+  if (err != WEBP_MUX_OK) {                                          \
+    fprintf(stderr, ERR_MSG, FORMAT_STR);                            \
+    return err;                                                      \
+  }
+
 #define RETURN_IF_ERROR3(ERR_MSG, FORMAT_STR1, FORMAT_STR2)          \
  if (err != WEBP_MUX_OK) {                                          \
    fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2);              \
@ -167,26 +172,23 @@ static const char* ErrorString(WebPMuxError err) {
  } while (0)

 #define ERROR_GOTO3(ERR_MSG, FORMAT_STR1, FORMAT_STR2, LABEL)        \
-  do {                                                               \
-    fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2);              \
-    ok = 0;                                                          \
-    goto LABEL;                                                      \
-  } while (0)
+   do {                                                              \
+     fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2);             \
+     ok = 0;                                                         \
+     goto LABEL;                                                     \
+   } while (0)

 static WebPMuxError DisplayInfo(const WebPMux* mux) {
-  int width, height;
  uint32_t flag;

-  WebPMuxError err = WebPMuxGetCanvasSize(mux, &width, &height);
-  assert(err == WEBP_MUX_OK);  // As WebPMuxCreate() was successful earlier.
-  printf("Canvas size: %d x %d\n", width, height);
-
-  err = WebPMuxGetFeatures(mux, &flag);
+  WebPMuxError err = WebPMuxGetFeatures(mux, &flag);
+#ifndef WEBP_EXPERIMENTAL_FEATURES
  if (flag & FRAGMENTS_FLAG) err = WEBP_MUX_INVALID_ARGUMENT;
+#endif
  RETURN_IF_ERROR("Failed to retrieve features\n");

  if (flag == 0) {
-    printf("No features present.\n");
+    fprintf(stderr, "No features present.\n");
    return err;
  }

@ -209,40 +211,26 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
    if (is_anim) {
      WebPMuxAnimParams params;
      err = WebPMuxGetAnimationParams(mux, &params);
-      assert(err == WEBP_MUX_OK);
+      RETURN_IF_ERROR("Failed to retrieve animation parameters\n");
      printf("Background color : 0x%.8X  Loop Count : %d\n",
             params.bgcolor, params.loop_count);
    }

    err = WebPMuxNumChunks(mux, id, &nFrames);
-    assert(err == WEBP_MUX_OK);
+    RETURN_IF_ERROR2("Failed to retrieve number of %ss\n", type_str);

    printf("Number of %ss: %d\n", type_str, nFrames);
    if (nFrames > 0) {
      int i;
-      printf("No.: width height alpha x_offset y_offset ");
-      if (is_anim) printf("duration   dispose blend ");
+      printf("No.: x_offset y_offset ");
+      if (is_anim) printf("duration dispose ");
      printf("image_size\n");
      for (i = 1; i <= nFrames; i++) {
        WebPMuxFrameInfo frame;
        err = WebPMuxGetFrame(mux, i, &frame);
        if (err == WEBP_MUX_OK) {
-          WebPBitstreamFeatures features;
-          const VP8StatusCode status = WebPGetFeatures(
-              frame.bitstream.bytes, frame.bitstream.size, &features);
-          assert(status == VP8_STATUS_OK);  // Checked by WebPMuxCreate().
-          (void)status;
-          printf("%3d: %5d %5d %5s %8d %8d ", i, features.width,
-                 features.height, features.has_alpha ? "yes" : "no",
-                 frame.x_offset, frame.y_offset);
-          if (is_anim) {
-            const char* const dispose =
-                (frame.dispose_method == WEBP_MUX_DISPOSE_NONE) ? "none"
-                                                                : "background";
-            const char* const blend =
-                (frame.blend_method == WEBP_MUX_BLEND) ? "yes" : "no";
-            printf("%8d %10s %5s ", frame.duration, dispose, blend);
-          }
+          printf("%3d: %8d %8d ", i, frame.x_offset, frame.y_offset);
+          if (is_anim) printf("%8d %7d ", frame.duration, frame.dispose_method);
          printf("%10d\n", (int)frame.bitstream.size);
        }
        WebPDataClear(&frame.bitstream);
@ -254,21 +242,21 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
  if (flag & ICCP_FLAG) {
    WebPData icc_profile;
    err = WebPMuxGetChunk(mux, "ICCP", &icc_profile);
-    assert(err == WEBP_MUX_OK);
+    RETURN_IF_ERROR("Failed to retrieve the ICC profile\n");
    printf("Size of the ICC profile data: %d\n", (int)icc_profile.size);
  }

  if (flag & EXIF_FLAG) {
    WebPData exif;
    err = WebPMuxGetChunk(mux, "EXIF", &exif);
-    assert(err == WEBP_MUX_OK);
+    RETURN_IF_ERROR("Failed to retrieve the EXIF metadata\n");
    printf("Size of the EXIF metadata: %d\n", (int)exif.size);
  }

  if (flag & XMP_FLAG) {
    WebPData xmp;
    err = WebPMuxGetChunk(mux, "XMP ", &xmp);
-    assert(err == WEBP_MUX_OK);
+    RETURN_IF_ERROR("Failed to retrieve the XMP metadata\n");
    printf("Size of the XMP metadata: %d\n", (int)xmp.size);
  }

@ -289,6 +277,9 @@ static void PrintHelp(void) {
  printf("Usage: webpmux -get GET_OPTIONS INPUT -o OUTPUT\n");
  printf("       webpmux -set SET_OPTIONS INPUT -o OUTPUT\n");
  printf("       webpmux -strip STRIP_OPTIONS INPUT -o OUTPUT\n");
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  printf("       webpmux -frgm FRAGMENT_OPTIONS [-frgm...] -o OUTPUT\n");
+#endif
  printf("       webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]"
         "\n");
  printf("               [-bgcolor BACKGROUND_COLOR] -o OUTPUT\n");
@ -298,39 +289,50 @@ static void PrintHelp(void) {

  printf("\n");
  printf("GET_OPTIONS:\n");
-  printf(" Extract relevant data:\n");
-  printf("   icc       get ICC profile\n");
-  printf("   exif      get EXIF metadata\n");
-  printf("   xmp       get XMP metadata\n");
-  printf("   frame n   get nth frame\n");
+  printf(" Extract relevant data.\n");
+  printf("   icc       Get ICC profile.\n");
+  printf("   exif      Get EXIF metadata.\n");
+  printf("   xmp       Get XMP metadata.\n");
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  printf("   frgm n    Get nth fragment.\n");
+#endif
+  printf("   frame n   Get nth frame.\n");

  printf("\n");
  printf("SET_OPTIONS:\n");
-  printf(" Set color profile/metadata:\n");
-  printf("   icc  file.icc     set ICC profile\n");
-  printf("   exif file.exif    set EXIF metadata\n");
-  printf("   xmp  file.xmp     set XMP metadata\n");
+  printf(" Set color profile/metadata.\n");
+  printf("   icc  file.icc     Set ICC profile.\n");
+  printf("   exif file.exif    Set EXIF metadata.\n");
+  printf("   xmp  file.xmp     Set XMP metadata.\n");
  printf("   where:    'file.icc' contains the ICC profile to be set,\n");
  printf("             'file.exif' contains the EXIF metadata to be set\n");
  printf("             'file.xmp' contains the XMP metadata to be set\n");

  printf("\n");
  printf("STRIP_OPTIONS:\n");
-  printf(" Strip color profile/metadata:\n");
-  printf("   icc       strip ICC profile\n");
-  printf("   exif      strip EXIF metadata\n");
-  printf("   xmp       strip XMP metadata\n");
+  printf(" Strip color profile/metadata.\n");
+  printf("   icc       Strip ICC profile.\n");
+  printf("   exif      Strip EXIF metadata.\n");
+  printf("   xmp       Strip XMP metadata.\n");
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  printf("\n");
+  printf("FRAGMENT_OPTIONS(i):\n");
+  printf(" Create fragmented image.\n");
+  printf("   file_i +xi+yi\n");
+  printf("   where:    'file_i' is the i'th fragment (WebP format),\n");
+  printf("             'xi','yi' specify the image offset for this fragment."
+         "\n");
+#endif

  printf("\n");
  printf("FRAME_OPTIONS(i):\n");
-  printf(" Create animation:\n");
-  printf("   file_i +di+[xi+yi[+mi[bi]]]\n");
+  printf(" Create animation.\n");
+  printf("   file_i +di+xi+yi+mi\n");
  printf("   where:    'file_i' is the i'th animation frame (WebP format),\n");
-  printf("             'di' is the pause duration before next frame,\n");
-  printf("             'xi','yi' specify the image offset for this frame,\n");
-  printf("             'mi' is the dispose method for this frame (0 or 1),\n");
-  printf("             'bi' is the blending method for this frame (+b or -b)"
-         "\n");
+  printf("             'di' is the pause duration before next frame.\n");
+  printf("             'xi','yi' specify the image offset for this frame.\n");
+  printf("             'mi' is the dispose method for this frame (0 or 1).\n");

  printf("\n");
  printf("LOOP_COUNT:\n");
@ -345,7 +347,7 @@ static void PrintHelp(void) {
         "specifying\n");
  printf("            the Alpha, Red, Green and Blue component values "
         "respectively\n");
-  printf("            [Default: 255,255,255,255]\n");
+  printf("            [Default: 255,255,255,255].\n");

  printf("\nINPUT & OUTPUT are in WebP format.\n");

@ -353,14 +355,6 @@ static void PrintHelp(void) {
  printf(" and is assumed to be\nvalid.\n");
 }

-static void WarnAboutOddOffset(const WebPMuxFrameInfo* const info) {
-  if ((info->x_offset | info->y_offset) & 1) {
-    fprintf(stderr, "Warning: odd offsets will be snapped to even values"
-            " (%d, %d) -> (%d, %d)\n", info->x_offset, info->y_offset,
-            info->x_offset & ~1, info->y_offset & ~1);
-  }
-}
-
 static int ReadFileToWebPData(const char* const filename,
                              WebPData* const webp_data) {
  const uint8_t* data;
@ -384,9 +378,8 @@ static int CreateMux(const char* const filename, WebPMux** mux) {

 static int WriteData(const char* filename, const WebPData* const webpdata) {
  int ok = 0;
-  FILE* fout = strcmp(filename, "-") ? fopen(filename, "wb")
-                                     : ExUtilSetBinaryMode(stdout);
-  if (fout == NULL) {
+  FILE* fout = strcmp(filename, "-") ? fopen(filename, "wb") : stdout;
+  if (!fout) {
    fprintf(stderr, "Error opening output WebP file %s!\n", filename);
    return 0;
  }
@ -416,44 +409,27 @@ static int WriteWebP(WebPMux* const mux, const char* filename) {

 static int ParseFrameArgs(const char* args, WebPMuxFrameInfo* const info) {
  int dispose_method, dummy;
-  char plus_minus, blend_method;
-  const int num_args = sscanf(args, "+%d+%d+%d+%d%c%c+%d", &info->duration,
-                              &info->x_offset, &info->y_offset, &dispose_method,
-                              &plus_minus, &blend_method, &dummy);
+  const int num_args = sscanf(args, "+%d+%d+%d+%d+%d",
+                              &info->duration, &info->x_offset, &info->y_offset,
+                              &dispose_method, &dummy);
  switch (num_args) {
    case 1:
      info->x_offset = info->y_offset = 0;  // fall through
    case 3:
      dispose_method = 0;  // fall through
    case 4:
-      plus_minus = '+';
-      blend_method = 'b';  // fall through
-    case 6:
      break;
-    case 2:
-    case 5:
    default:
      return 0;
  }
-
-  WarnAboutOddOffset(info);
-
  // Note: The sanity of the following conversion is checked by
-  // WebPMuxPushFrame().
+  // WebPMuxSetAnimationParams().
  info->dispose_method = (WebPMuxAnimDispose)dispose_method;
-
-  if (blend_method != 'b') return 0;
-  if (plus_minus != '-' && plus_minus != '+') return 0;
-  info->blend_method =
-      (plus_minus == '+') ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
  return 1;
 }

 static int ParseFragmentArgs(const char* args, WebPMuxFrameInfo* const info) {
-  const int ok =
-      (sscanf(args, "+%d+%d", &info->x_offset, &info->y_offset) == 2);
-  if (ok) WarnAboutOddOffset(info);
-  return ok;
+  return (sscanf(args, "+%d+%d", &info->x_offset, &info->y_offset) == 2);
 }

 static int ParseBgcolorArgs(const char* args, uint32_t* const bgcolor) {
@ -470,7 +446,7 @@ static int ParseBgcolorArgs(const char* args, uint32_t* const bgcolor) {
 static void DeleteConfig(WebPMuxConfig* config) {
  if (config != NULL) {
    free(config->feature_.args_);
-    memset(config, 0, sizeof(*config));
+    free(config);
  }
 }

@ -632,6 +608,24 @@ static int ParseCommandLine(int argc, const char* argv[],
        arg->params_ = argv[i + 1];
        ++feature_arg_index;
        i += 2;
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+      } else if (!strcmp(argv[i], "-frgm")) {
+        CHECK_NUM_ARGS_LESS(3, ErrParse);
+        if (ACTION_IS_NIL || config->action_type_ == ACTION_SET) {
+          config->action_type_ = ACTION_SET;
+        } else {
+          ERROR_GOTO1("ERROR: Multiple actions specified.\n", ErrParse);
+        }
+        if (FEATURETYPE_IS_NIL || feature->type_ == FEATURE_FRGM) {
+          feature->type_ = FEATURE_FRGM;
+        } else {
+          ERROR_GOTO1("ERROR: Multiple features specified.\n", ErrParse);
+        }
+        arg->filename_ = argv[i + 1];
+        arg->params_ = argv[i + 2];
+        ++feature_arg_index;
+        i += 3;
+#endif
      } else if (!strcmp(argv[i], "-o")) {
        CHECK_NUM_ARGS_LESS(2, ErrParse);
        config->output_ = argv[i + 1];
@ -656,17 +650,6 @@ static int ParseCommandLine(int argc, const char* argv[],
               (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
        DeleteConfig(config);
        exit(0);
-      } else if (!strcmp(argv[i], "--")) {
-        if (i < argc - 1) {
-          ++i;
-          if (config->input_ == NULL) {
-            config->input_ = argv[i];
-          } else {
-            ERROR_GOTO2("ERROR at '%s': Multiple input files specified.\n",
-                        argv[i], ErrParse);
-          }
-        }
-        break;
      } else {
        ERROR_GOTO2("ERROR: Unknown option: '%s'.\n", argv[i], ErrParse);
      }
@ -691,8 +674,13 @@ static int ParseCommandLine(int argc, const char* argv[],
        } else {
          ++i;
        }
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+      } else if ((!strcmp(argv[i], "frame") ||
+                  !strcmp(argv[i], "frgm")) &&
+#else
      } else if (!strcmp(argv[i], "frame") &&
-                 (config->action_type_ == ACTION_GET)) {
+#endif
+                  (config->action_type_ == ACTION_GET)) {
        CHECK_NUM_ARGS_LESS(2, ErrParse);
        feature->type_ = (!strcmp(argv[i], "frame")) ? FEATURE_ANMF :
            FEATURE_FRGM;
@ -750,27 +738,33 @@ static int ValidateConfig(WebPMuxConfig* config) {

 // Create config object from command-line arguments.
 static int InitializeConfig(int argc, const char* argv[],
-                            WebPMuxConfig* config) {
+                            WebPMuxConfig** config) {
  int num_feature_args = 0;
  int ok = 1;

  assert(config != NULL);
-  memset(config, 0, sizeof(*config));
+  *config = NULL;

  // Validate command-line arguments.
  if (!ValidateCommandLine(argc, argv, &num_feature_args)) {
    ERROR_GOTO1("Exiting due to command-line parsing error.\n", Err1);
  }

-  config->feature_.arg_count_ = num_feature_args;
-  config->feature_.args_ =
-      (FeatureArg*)calloc(num_feature_args, sizeof(*config->feature_.args_));
-  if (config->feature_.args_ == NULL) {
+  // Allocate memory.
+  *config = (WebPMuxConfig*)calloc(1, sizeof(**config));
+  if (*config == NULL) {
+    ERROR_GOTO1("ERROR: Memory allocation error.\n", Err1);
+  }
+  (*config)->feature_.arg_count_ = num_feature_args;
+  (*config)->feature_.args_ =
+      (FeatureArg*)calloc(num_feature_args, sizeof(FeatureArg));
+  if ((*config)->feature_.args_ == NULL) {
    ERROR_GOTO1("ERROR: Memory allocation error.\n", Err1);
  }

  // Parse command-line.
-  if (!ParseCommandLine(argc, argv, config) || !ValidateConfig(config)) {
+  if (!ParseCommandLine(argc, argv, *config) ||
+      !ValidateConfig(*config)) {
    ERROR_GOTO1("Exiting due to command-line parsing error.\n", Err1);
  }

@ -790,23 +784,21 @@ static int GetFrameFragment(const WebPMux* mux,
                            const WebPMuxConfig* config, int is_frame) {
  WebPMuxError err = WEBP_MUX_OK;
  WebPMux* mux_single = NULL;
-  int num = 0;
+  long num = 0;
  int ok = 1;
-  int parse_error = 0;
  const WebPChunkId id = is_frame ? WEBP_CHUNK_ANMF : WEBP_CHUNK_FRGM;
  WebPMuxFrameInfo info;
  WebPDataInit(&info.bitstream);

-  num = ExUtilGetInt(config->feature_.args_[0].params_, 10, &parse_error);
+  num = strtol(config->feature_.args_[0].params_, NULL, 10);
  if (num < 0) {
    ERROR_GOTO1("ERROR: Frame/Fragment index must be non-negative.\n", ErrGet);
  }
-  if (parse_error) goto ErrGet;

  err = WebPMuxGetFrame(mux, num, &info);
  if (err == WEBP_MUX_OK && info.id != id) err = WEBP_MUX_NOT_FOUND;
  if (err != WEBP_MUX_OK) {
-    ERROR_GOTO3("ERROR (%s): Could not get frame %d.\n",
+    ERROR_GOTO3("ERROR (%s): Could not get frame %ld.\n",
                ErrorString(err), num, ErrGet);
  }

@ -827,7 +819,7 @@ static int GetFrameFragment(const WebPMux* mux,
 ErrGet:
  WebPDataClear(&info.bitstream);
  WebPMuxDelete(mux_single);
-  return ok && !parse_error;
+  return ok;
 }

 // Read and process config.
@ -889,19 +881,16 @@ static int Process(const WebPMuxConfig* config) {
                break;
              }
              case SUBTYPE_LOOP: {
-                int parse_error = 0;
-                const int loop_count =
-                    ExUtilGetInt(feature->args_[i].params_, 10, &parse_error);
-                if (loop_count < 0 || loop_count > 65535) {
+                const long loop_count =
+                    strtol(feature->args_[i].params_, NULL, 10);
+                if (loop_count != (int)loop_count) {
                  // Note: This is only a 'necessary' condition for loop_count
                  // to be valid. The 'sufficient' conditioned in checked in
                  // WebPMuxSetAnimationParams() method called later.
                  ERROR_GOTO1("ERROR: Loop count must be in the range 0 to "
                              "65535.\n", Err2);
                }
-                ok = !parse_error;
-                if (!ok) goto Err2;
-                params.loop_count = loop_count;
+                params.loop_count = (int)loop_count;
                break;
              }
              case SUBTYPE_ANMF: {
@ -1001,8 +990,8 @@ static int Process(const WebPMuxConfig* config) {
                      ErrorString(err), kDescriptions[feature->type_], Err2);
        }
      } else {
-        ERROR_GOTO1("ERROR: Invalid feature for action 'strip'.\n", Err2);
-        break;
+          ERROR_GOTO1("ERROR: Invalid feature for action 'strip'.\n", Err2);
+          break;
      }
      ok = WriteWebP(mux, config->output_);
      break;
@ -1028,14 +1017,14 @@ static int Process(const WebPMuxConfig* config) {
 // Main.

 int main(int argc, const char* argv[]) {
-  WebPMuxConfig config;
+  WebPMuxConfig* config;
  int ok = InitializeConfig(argc - 1, argv + 1, &config);
  if (ok) {
-    ok = Process(&config);
+    ok = Process(config);
  } else {
    PrintHelp();
  }
-  DeleteConfig(&config);
+  DeleteConfig(config);
  return !ok;
 }

--- a/examples/wicdec.c
+++ b/examples/wicdec.c
@ -12,12 +12,10 @@
 #include "./wicdec.h"

 #ifdef HAVE_CONFIG_H
-#include "webp/config.h"
+#include "config.h"
 #endif

-#include <assert.h>
 #include <stdio.h>
-#include <string.h>

 #ifdef HAVE_WINCODEC_H
 #ifdef __MINGW32__
@ -27,13 +25,11 @@
 #define COBJMACROS
 #define _WIN32_IE 0x500  // Workaround bug in shlwapi.h when compiling C++
                         // code with COBJMACROS.
-#include <ole2.h>  // CreateStreamOnHGlobal()
 #include <shlwapi.h>
 #include <windows.h>
 #include <wincodec.h>

 #include "webp/encode.h"
-#include "./example_util.h"
 #include "./metadata.h"

 #define IFS(fn)                                                     \
@ -76,41 +72,10 @@ WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppBGRA_,
 WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppRGBA_,
                 0xf5c7ad2d, 0x6a8d, 0x43dd,
                 0xa7, 0xa8, 0xa2, 0x99, 0x35, 0x26, 0x1a, 0xe9);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppBGRA_,
-                 0x1562ff7c, 0xd352, 0x46f9,
-                 0x97, 0x9e, 0x42, 0x97, 0x6b, 0x79, 0x22, 0x46);
-WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppRGBA_,
-                 0x6fddc324, 0x4e03, 0x4bfe,
-                 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x16);

 static HRESULT OpenInputStream(const char* filename, IStream** stream) {
  HRESULT hr = S_OK;
-  if (!strcmp(filename, "-")) {
-    const uint8_t* data = NULL;
-    size_t data_size = 0;
-    const int ok = ExUtilReadFile(filename, &data, &data_size);
-    if (ok) {
-      HGLOBAL image = GlobalAlloc(GMEM_MOVEABLE, data_size);
-      if (image != NULL) {
-        void* const image_mem = GlobalLock(image);
-        if (image_mem != NULL) {
-          memcpy(image_mem, data, data_size);
-          GlobalUnlock(image);
-          IFS(CreateStreamOnHGlobal(image, TRUE, stream));
-        } else {
-          hr = E_FAIL;
-        }
-      } else {
-        hr = E_OUTOFMEMORY;
-      }
-      free((void*)data);
-    } else {
-      hr = E_FAIL;
-    }
-  } else {
-    IFS(SHCreateStreamOnFileA(filename, STGM_READ, stream));
-  }
-
+  IFS(SHCreateStreamOnFileA(filename, STGM_READ, stream));
  if (FAILED(hr)) {
    fprintf(stderr, "Error opening input file %s (%08lx)\n", filename, hr);
  }
@ -144,7 +109,6 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
    IFS(IWICBitmapFrameDecode_GetColorContexts(frame,
                                               count, color_contexts,
                                               &num_color_contexts));
-    assert(FAILED(hr) || num_color_contexts <= count);
    for (i = 0; SUCCEEDED(hr) && i < num_color_contexts; ++i) {
      WICColorContextType type;
      IFS(IWICColorContext_GetType(color_contexts[i], &type));
@ -152,7 +116,7 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
        UINT size;
        IFS(IWICColorContext_GetProfileBytes(color_contexts[i],
                                             0, NULL, &size));
-        if (SUCCEEDED(hr) && size > 0) {
+        if (size > 0) {
          iccp->bytes = (uint8_t*)malloc(size);
          if (iccp->bytes == NULL) {
            hr = E_OUTOFMEMORY;
@ -230,11 +194,7 @@ static int HasAlpha(IWICImagingFactory* const factory,
    has_alpha = IsEqualGUID(MAKE_REFGUID(pixel_format),
                            MAKE_REFGUID(GUID_WICPixelFormat32bppRGBA_)) ||
                IsEqualGUID(MAKE_REFGUID(pixel_format),
-                            MAKE_REFGUID(GUID_WICPixelFormat32bppBGRA_)) ||
-                IsEqualGUID(MAKE_REFGUID(pixel_format),
-                            MAKE_REFGUID(GUID_WICPixelFormat64bppRGBA_)) ||
-                IsEqualGUID(MAKE_REFGUID(pixel_format),
-                            MAKE_REFGUID(GUID_WICPixelFormat64bppBGRA_));
+                            MAKE_REFGUID(GUID_WICPixelFormat32bppBGRA_));
  }
  return has_alpha;
 }
@ -301,7 +261,7 @@ int ReadPictureWithWIC(const char* const filename,
  IFS(IWICBitmapFrameDecode_GetPixelFormat(frame, &src_pixel_format));
  IFS(IWICBitmapDecoder_GetContainerFormat(decoder, &src_container_format));

-  if (SUCCEEDED(hr) && keep_alpha) {
+  if (keep_alpha) {
    const GUID** guid;
    for (guid = kAlphaContainers; *guid != NULL; ++guid) {
      if (IsEqualGUID(MAKE_REFGUID(src_container_format),
@ -348,7 +308,6 @@ int ReadPictureWithWIC(const char* const filename,
    int ok;
    pic->width = width;
    pic->height = height;
-    pic->use_argb = 1;    // For WIC, we always force to argb
    ok = importer->import(pic, rgb, stride);
    if (!ok) hr = E_FAIL;
  }
--- a/examples/wicdec.h
+++ b/examples/wicdec.h
@ -12,7 +12,7 @@
 #ifndef WEBP_EXAMPLES_WICDEC_H_
 #define WEBP_EXAMPLES_WICDEC_H_

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

@ -21,13 +21,13 @@ struct WebPPicture;

 // Reads an image from 'filename', returning the decoded output in 'pic'.
 // If 'keep_alpha' is true and the image has an alpha channel, the output is
-// RGBA otherwise it will be RGB. pic->use_argb is always forced to true.
+// RGBA otherwise it will be RGB.
 // Returns true on success.
 int ReadPictureWithWIC(const char* const filename,
                       struct WebPPicture* const pic, int keep_alpha,
                       struct Metadata* const metadata);

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/iosbuild.sh
+++ b/iosbuild.sh
@ -12,39 +12,28 @@
 set -e

 # Extract the latest SDK version from the final field of the form: iphoneosX.Y
-readonly SDK=$(xcodebuild -showsdks \
+declare -r SDK=$(xcodebuild -showsdks \
  | grep iphoneos | sort | tail -n 1 | awk '{print substr($NF, 9)}'
 )
-# Extract Xcode version.
-readonly XCODE=$(xcodebuild -version | grep Xcode | cut -d " " -f2)
-if [[ -z "${XCODE}" ]]; then
-  echo "Xcode not available"
-  exit 1
-fi
-
-readonly OLDPATH=${PATH}
+declare -r OLDPATH=${PATH}

 # Add iPhoneOS-V6 to the list of platforms below if you need armv6 support.
 # Note that iPhoneOS-V6 support is not available with the iOS6 SDK.
-PLATFORMS="iPhoneSimulator iPhoneSimulator64"
-PLATFORMS+=" iPhoneOS-V7 iPhoneOS-V7s iPhoneOS-V7-arm64"
-readonly PLATFORMS
-readonly SRCDIR=$(dirname $0)
-readonly TOPDIR=$(pwd)
-readonly BUILDDIR="${TOPDIR}/iosbuild"
-readonly TARGETDIR="${TOPDIR}/WebP.framework"
-readonly DEVELOPER=$(xcode-select --print-path)
-readonly PLATFORMSROOT="${DEVELOPER}/Platforms"
-readonly LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
+declare -r PLATFORMS="iPhoneSimulator iPhoneOS-V7 iPhoneOS-V7s"
+declare -r SRCDIR=$(dirname $0)
+declare -r TOPDIR=$(pwd)
+declare -r BUILDDIR="${TOPDIR}/iosbuild"
+declare -r TARGETDIR="${TOPDIR}/WebP.framework"
+declare -r DEVELOPER=$(xcode-select --print-path)
+declare -r PLATFORMSROOT="${DEVELOPER}/Platforms"
+declare -r LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
 LIBLIST=''

 if [[ -z "${SDK}" ]]; then
  echo "iOS SDK not available"
  exit 1
-elif [[ ${SDK%%.*} -gt 8 ]]; then
-  EXTRA_CFLAGS="-fembed-bitcode"
-elif [[ ${SDK} < 6.0 ]]; then
-  echo "You need iOS SDK version 6.0 or above"
+elif [[ ${SDK} < 4.0 ]]; then
+  echo "You need iOS SDK version 4.0 or above"
  exit 1
 else
  echo "iOS SDK Version ${SDK}"
@ -55,25 +44,10 @@ rm -rf ${TARGETDIR}
 mkdir -p ${BUILDDIR}
 mkdir -p ${TARGETDIR}/Headers/

-if [[ ! -e ${SRCDIR}/configure ]]; then
-  if ! (cd ${SRCDIR} && sh autogen.sh); then
-    cat <<EOT
-Error creating configure script!
-This script requires the autoconf/automake and libtool to build. MacPorts can
-be used to obtain these:
-http://www.macports.org/install.php
-EOT
-    exit 1
-  fi
-fi
+[[ -e ${SRCDIR}/configure ]] || (cd ${SRCDIR} && sh autogen.sh)

 for PLATFORM in ${PLATFORMS}; do
-  ARCH2=""
-  if [[ "${PLATFORM}" == "iPhoneOS-V7-arm64" ]]; then
-    PLATFORM="iPhoneOS"
-    ARCH="aarch64"
-    ARCH2="arm64"
-  elif [[ "${PLATFORM}" == "iPhoneOS-V7s" ]]; then
+  if [[ "${PLATFORM}" == "iPhoneOS-V7s" ]]; then
    PLATFORM="iPhoneOS"
    ARCH="armv7s"
  elif [[ "${PLATFORM}" == "iPhoneOS-V7" ]]; then
@ -82,9 +56,6 @@ for PLATFORM in ${PLATFORMS}; do
  elif [[ "${PLATFORM}" == "iPhoneOS-V6" ]]; then
    PLATFORM="iPhoneOS"
    ARCH="armv6"
-  elif [[ "${PLATFORM}" == "iPhoneSimulator64" ]]; then
-    PLATFORM="iPhoneSimulator"
-    ARCH="x86_64"
  else
    ARCH="i386"
  fi
@ -92,20 +63,18 @@ for PLATFORM in ${PLATFORMS}; do
  ROOTDIR="${BUILDDIR}/${PLATFORM}-${SDK}-${ARCH}"
  mkdir -p "${ROOTDIR}"

-  DEVROOT="${DEVELOPER}/Toolchains/XcodeDefault.xctoolchain"
-  SDKROOT="${PLATFORMSROOT}/"
-  SDKROOT+="${PLATFORM}.platform/Developer/SDKs/${PLATFORM}${SDK}.sdk/"
-  CFLAGS="-arch ${ARCH2:-${ARCH}} -pipe -isysroot ${SDKROOT} -O3 -DNDEBUG"
-  CFLAGS+=" -miphoneos-version-min=6.0 ${EXTRA_CFLAGS}"
+  export DEVROOT="${PLATFORMSROOT}/${PLATFORM}.platform/Developer"
+  export SDKROOT="${DEVROOT}/SDKs/${PLATFORM}${SDK}.sdk"

-  set -x
+  export CFLAGS="-arch ${ARCH} -pipe -isysroot ${SDKROOT}"
+  export CXXFLAGS=${CFLAGS}
+  export LDFLAGS="-arch ${ARCH} -pipe -isysroot ${SDKROOT}"
  export PATH="${DEVROOT}/usr/bin:${OLDPATH}"
+
  ${SRCDIR}/configure --host=${ARCH}-apple-darwin --prefix=${ROOTDIR} \
    --build=$(${SRCDIR}/config.guess) \
    --disable-shared --enable-static \
-    --enable-libwebpdecoder --enable-swap-16bit-csp \
-    CFLAGS="${CFLAGS}"
-  set +x
+    --enable-libwebpdecoder --enable-swap-16bit-csp

  # run make only in the src/ directory to create libwebpdecoder.a
  cd src/
@ -120,5 +89,5 @@ for PLATFORM in ${PLATFORMS}; do
  export PATH=${OLDPATH}
 done

-cp -a ${SRCDIR}/src/webp/*.h ${TARGETDIR}/Headers/
+cp -a ${SRCDIR}/src/webp/* ${TARGETDIR}/Headers/
 ${LIPO} -create ${LIBLIST} -output ${TARGETDIR}/WebP
--- a/m4/ax_pthread.m4
+++ b/m4/ax_pthread.m4
@ -82,7 +82,7 @@
 #   modified version of the Autoconf Macro, you may extend this special
 #   exception to the GPL to apply to your modified version as well.

-#serial 21
+#serial 18

 AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD])
 AC_DEFUN([AX_PTHREAD], [
@ -103,8 +103,8 @@ if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then
        save_LIBS="$LIBS"
        LIBS="$PTHREAD_LIBS $LIBS"
        AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS])
-        AC_TRY_LINK_FUNC([pthread_join], [ax_pthread_ok=yes])
-        AC_MSG_RESULT([$ax_pthread_ok])
+        AC_TRY_LINK_FUNC(pthread_join, ax_pthread_ok=yes)
+        AC_MSG_RESULT($ax_pthread_ok)
        if test x"$ax_pthread_ok" = xno; then
                PTHREAD_LIBS=""
                PTHREAD_CFLAGS=""
@ -164,20 +164,6 @@ case ${host_os} in
        ;;
 esac

-# Clang doesn't consider unrecognized options an error unless we specify
-# -Werror. We throw in some extra Clang-specific options to ensure that
-# this doesn't happen for GCC, which also accepts -Werror.
-
-AC_MSG_CHECKING([if compiler needs -Werror to reject unknown flags])
-save_CFLAGS="$CFLAGS"
-ax_pthread_extra_flags="-Werror"
-CFLAGS="$CFLAGS $ax_pthread_extra_flags -Wunknown-warning-option -Wsizeof-array-argument"
-AC_COMPILE_IFELSE([AC_LANG_PROGRAM([int foo(void);],[foo()])],
-                  [AC_MSG_RESULT([yes])],
-                  [ax_pthread_extra_flags=
-                   AC_MSG_RESULT([no])])
-CFLAGS="$save_CFLAGS"
-
 if test x"$ax_pthread_ok" = xno; then
 for flag in $ax_pthread_flags; do

@ -192,7 +178,7 @@ for flag in $ax_pthread_flags; do
                ;;

                pthread-config)
-                AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no])
+                AC_CHECK_PROG(ax_pthread_config, pthread-config, yes, no)
                if test x"$ax_pthread_config" = xno; then continue; fi
                PTHREAD_CFLAGS="`pthread-config --cflags`"
                PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
@ -207,7 +193,7 @@ for flag in $ax_pthread_flags; do
        save_LIBS="$LIBS"
        save_CFLAGS="$CFLAGS"
        LIBS="$PTHREAD_LIBS $LIBS"
-        CFLAGS="$CFLAGS $PTHREAD_CFLAGS $ax_pthread_extra_flags"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"

        # Check for various functions.  We must include pthread.h,
        # since some functions may be macros.  (On the Sequent, we
@ -233,7 +219,7 @@ for flag in $ax_pthread_flags; do
        LIBS="$save_LIBS"
        CFLAGS="$save_CFLAGS"

-        AC_MSG_RESULT([$ax_pthread_ok])
+        AC_MSG_RESULT($ax_pthread_ok)
        if test "x$ax_pthread_ok" = xyes; then
                break;
        fi
@ -259,9 +245,9 @@ if test "x$ax_pthread_ok" = xyes; then
                [attr_name=$attr; break],
                [])
        done
-        AC_MSG_RESULT([$attr_name])
+        AC_MSG_RESULT($attr_name)
        if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
-            AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], [$attr_name],
+            AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name,
                               [Define to necessary symbol if this constant
                                uses a non-standard name on your system.])
        fi
@ -275,54 +261,45 @@ if test "x$ax_pthread_ok" = xyes; then
            if test "$GCC" = "yes"; then
                flag="-D_REENTRANT"
            else
-                # TODO: What about Clang on Solaris?
                flag="-mt -D_REENTRANT"
            fi
            ;;
        esac
-        AC_MSG_RESULT([$flag])
+        AC_MSG_RESULT(${flag})
        if test "x$flag" != xno; then
            PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS"
        fi

        AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT],
-            [ax_cv_PTHREAD_PRIO_INHERIT], [
-                AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]],
-                                                [[int i = PTHREAD_PRIO_INHERIT;]])],
+            ax_cv_PTHREAD_PRIO_INHERIT, [
+                AC_LINK_IFELSE([
+                    AC_LANG_PROGRAM([[#include <pthread.h>]], [[int i = PTHREAD_PRIO_INHERIT;]])],
                    [ax_cv_PTHREAD_PRIO_INHERIT=yes],
                    [ax_cv_PTHREAD_PRIO_INHERIT=no])
            ])
        AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"],
-            [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.])])
+            AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], 1, [Have PTHREAD_PRIO_INHERIT.]))

        LIBS="$save_LIBS"
        CFLAGS="$save_CFLAGS"

-        # More AIX lossage: compile with *_r variant
-        if test "x$GCC" != xyes; then
-            case $host_os in
-                aix*)
-                AS_CASE(["x/$CC"],
-                  [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6],
-                  [#handle absolute path differently from PATH based program lookup
-                   AS_CASE(["x$CC"],
-                     [x/*],
-                     [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])],
-                     [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])])
-                ;;
-            esac
+        # More AIX lossage: must compile with xlc_r or cc_r
+        if test x"$GCC" != xyes; then
+          AC_CHECK_PROGS(PTHREAD_CC, xlc_r cc_r, ${CC})
+        else
+          PTHREAD_CC=$CC
        fi
+else
+        PTHREAD_CC="$CC"
 fi

-test -n "$PTHREAD_CC" || PTHREAD_CC="$CC"
-
-AC_SUBST([PTHREAD_LIBS])
-AC_SUBST([PTHREAD_CFLAGS])
-AC_SUBST([PTHREAD_CC])
+AC_SUBST(PTHREAD_LIBS)
+AC_SUBST(PTHREAD_CFLAGS)
+AC_SUBST(PTHREAD_CC)

 # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
 if test x"$ax_pthread_ok" = xyes; then
-        ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1])
+        ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1])
        :
 else
        ax_pthread_ok=no
--- a/makefile.unix
+++ b/makefile.unix
@ -2,8 +2,8 @@
 # system, for simple local building of the libraries and tools.
 # It will not install the libraries system-wide, but just create the 'cwebp'
 # and 'dwebp' tools in the examples/ directory, along with the static
-# libraries 'src/libwebp.a', 'src/libwebpdecoder.a', 'src/mux/libwebpmux.a',
-# 'src/demux/libwebpdemux.a' and 'src/libwebpextras.a'.
+# libraries 'src/libwebp.a', 'src/libwebpdecoder.a', 'src/mux/libwebpmux.a' and
+# 'src/demux/libwebpdemux.a'.
 #
 # To build the library and examples, use:
 #    make -f makefile.unix
@ -61,55 +61,33 @@ endif
 EXTRA_FLAGS += -DWEBP_USE_THREAD
 EXTRA_LIBS += -lpthread

-# Control symbol visibility. Comment out if your compiler doesn't support it.
-EXTRA_FLAGS += -fvisibility=hidden
-
 # Extra flags to emulate C89 strictness with the full ANSI
 EXTRA_FLAGS += -Wextra -Wold-style-definition
 EXTRA_FLAGS += -Wmissing-prototypes
 EXTRA_FLAGS += -Wmissing-declarations
 EXTRA_FLAGS += -Wdeclaration-after-statement
 EXTRA_FLAGS += -Wshadow
-EXTRA_FLAGS += -Wformat-security -Wformat-nonliteral
 # EXTRA_FLAGS += -Wvla

-# SSE4.1-specific flags:
-ifeq ($(HAVE_SSE41), 1)
-EXTRA_FLAGS += -DWEBP_HAVE_SSE41
-src/dsp/%_sse41.o: EXTRA_FLAGS += -msse4.1
-endif
-
-# AVX2-specific flags:
-ifeq ($(HAVE_AVX2), 1)
-EXTRA_FLAGS += -DWEBP_HAVE_AVX2
-src/dsp/%_avx2.o: EXTRA_FLAGS += -mavx2
-endif
-
-# NEON-specific flags:
-# EXTRA_FLAGS += -march=armv7-a -mfloat-abi=hard -mfpu=neon -mtune=cortex-a8
-# -> seems to make the overall lib slower: -fno-split-wide-types
-
 #### Nothing should normally be changed below this line ####

 AR = ar
 ARFLAGS = r
+CC = gcc
 CPPFLAGS = -Isrc/ -Wall
 CFLAGS = -O3 -DNDEBUG $(EXTRA_FLAGS)
-CC = gcc
 INSTALL = install
 GROFF = /usr/bin/groff
 COL = /usr/bin/col
 LDFLAGS = $(EXTRA_LIBS) $(EXTRA_FLAGS) -lm

-ANIM_UTIL_OBJS = \
-    examples/anim_util.o \
-
 DEC_OBJS = \
    src/dec/alpha.o \
    src/dec/buffer.o \
    src/dec/frame.o \
    src/dec/idec.o \
    src/dec/io.o \
+    src/dec/layer.o \
    src/dec/quant.o \
    src/dec/tree.o \
    src/dec/vp8.o \
@ -117,64 +95,23 @@ DEC_OBJS = \
    src/dec/webp.o \

 DEMUX_OBJS = \
-    src/demux/anim_decode.o \
    src/demux/demux.o \

 DSP_DEC_OBJS = \
-    src/dsp/alpha_processing.o \
-    src/dsp/alpha_processing_mips_dsp_r2.o \
-    src/dsp/alpha_processing_sse2.o \
-    src/dsp/alpha_processing_sse41.o \
    src/dsp/cpu.o \
    src/dsp/dec.o \
-    src/dsp/dec_clip_tables.o \
-    src/dsp/dec_mips32.o \
-    src/dsp/dec_mips_dsp_r2.o \
    src/dsp/dec_neon.o \
    src/dsp/dec_sse2.o \
-    src/dsp/dec_sse41.o \
-    src/dsp/filters.o \
-    src/dsp/filters_mips_dsp_r2.o \
-    src/dsp/filters_sse2.o \
    src/dsp/lossless.o \
-    src/dsp/lossless_mips_dsp_r2.o \
-    src/dsp/lossless_neon.o \
-    src/dsp/lossless_sse2.o \
-    src/dsp/rescaler.o \
-    src/dsp/rescaler_mips32.o \
-    src/dsp/rescaler_mips_dsp_r2.o \
-    src/dsp/rescaler_neon.o \
-    src/dsp/rescaler_sse2.o \
    src/dsp/upsampling.o \
-    src/dsp/upsampling_mips_dsp_r2.o \
    src/dsp/upsampling_neon.o \
    src/dsp/upsampling_sse2.o \
    src/dsp/yuv.o \
-    src/dsp/yuv_mips32.o \
-    src/dsp/yuv_mips_dsp_r2.o \
-    src/dsp/yuv_sse2.o \

 DSP_ENC_OBJS = \
-    src/dsp/argb.o \
-    src/dsp/argb_mips_dsp_r2.o \
-    src/dsp/argb_sse2.o \
-    src/dsp/cost.o \
-    src/dsp/cost_mips32.o \
-    src/dsp/cost_mips_dsp_r2.o \
-    src/dsp/cost_sse2.o \
    src/dsp/enc.o \
-    src/dsp/enc_avx2.o \
-    src/dsp/enc_mips32.o \
-    src/dsp/enc_mips_dsp_r2.o \
    src/dsp/enc_neon.o \
    src/dsp/enc_sse2.o \
-    src/dsp/enc_sse41.o \
-    src/dsp/lossless_enc.o \
-    src/dsp/lossless_enc_mips32.o \
-    src/dsp/lossless_enc_mips_dsp_r2.o \
-    src/dsp/lossless_enc_neon.o \
-    src/dsp/lossless_enc_sse2.o \
-    src/dsp/lossless_enc_sse41.o \

 ENC_OBJS = \
    src/enc/alpha.o \
@ -182,17 +119,12 @@ ENC_OBJS = \
    src/enc/backward_references.o \
    src/enc/config.o \
    src/enc/cost.o \
-    src/enc/delta_palettization.o \
    src/enc/filter.o \
    src/enc/frame.o \
    src/enc/histogram.o \
    src/enc/iterator.o \
-    src/enc/near_lossless.o \
+    src/enc/layer.o \
    src/enc/picture.o \
-    src/enc/picture_csp.o \
-    src/enc/picture_psnr.o \
-    src/enc/picture_rescale.o \
-    src/enc/picture_tools.o \
    src/enc/quant.o \
    src/enc/syntax.o \
    src/enc/token.o \
@ -205,16 +137,11 @@ EX_FORMAT_DEC_OBJS = \
    examples/metadata.o \
    examples/pngdec.o \
    examples/tiffdec.o \
-    examples/webpdec.o \

 EX_UTIL_OBJS = \
    examples/example_util.o \

-GIFDEC_OBJS = \
-    examples/gifdec.o \
-
 MUX_OBJS = \
-    src/mux/anim_encode.o \
    src/mux/muxedit.o \
    src/mux/muxinternal.o \
    src/mux/muxread.o \
@ -225,7 +152,6 @@ UTILS_DEC_OBJS = \
    src/utils/filters.o \
    src/utils/huffman.o \
    src/utils/quant_levels_dec.o \
-    src/utils/random.o \
    src/utils/rescaler.o \
    src/utils/thread.o \
    src/utils/utils.o \
@ -235,15 +161,11 @@ UTILS_ENC_OBJS = \
    src/utils/huffman_encode.o \
    src/utils/quant_levels.o \

-EXTRA_OBJS = \
-    src/extras/extras.o \
-
 LIBWEBPDECODER_OBJS = $(DEC_OBJS) $(DSP_DEC_OBJS) $(UTILS_DEC_OBJS)
 LIBWEBP_OBJS = $(LIBWEBPDECODER_OBJS) $(ENC_OBJS) $(DSP_ENC_OBJS) \
               $(UTILS_ENC_OBJS)
 LIBWEBPMUX_OBJS = $(MUX_OBJS)
 LIBWEBPDEMUX_OBJS = $(DEMUX_OBJS)
-LIBWEBPEXTRA_OBJS = $(EXTRA_OBJS)

 HDRS_INSTALLED = \
    src/webp/decode.h \
@ -254,107 +176,70 @@ HDRS_INSTALLED = \
    src/webp/types.h \

 HDRS = \
-    src/dec/alphai.h \
-    src/dec/common.h \
    src/dec/decode_vp8.h \
    src/dec/vp8i.h \
    src/dec/vp8li.h \
    src/dec/webpi.h \
    src/dsp/dsp.h \
    src/dsp/lossless.h \
-    src/dsp/mips_macro.h \
-    src/dsp/neon.h \
    src/dsp/yuv.h \
-    src/enc/backward_references.h \
    src/enc/cost.h \
-    src/enc/delta_palettization.h \
-    src/enc/histogram.h \
    src/enc/vp8enci.h \
-    src/enc/vp8li.h \
-    src/mux/muxi.h \
    src/utils/bit_reader.h \
-    src/utils/bit_reader_inl.h \
    src/utils/bit_writer.h \
    src/utils/color_cache.h \
-    src/utils/endian_inl.h \
    src/utils/filters.h \
    src/utils/huffman.h \
    src/utils/huffman_encode.h \
    src/utils/quant_levels.h \
    src/utils/quant_levels_dec.h \
-    src/utils/random.h \
    src/utils/rescaler.h \
    src/utils/thread.h \
-    src/utils/utils.h \
    src/webp/format_constants.h \
    $(HDRS_INSTALLED) \

 OUT_LIBS = examples/libexample_util.a src/libwebpdecoder.a src/libwebp.a
-EXTRA_LIB = src/libwebpextras.a
 OUT_EXAMPLES = examples/cwebp examples/dwebp
-EXTRA_EXAMPLES = examples/gif2webp examples/vwebp examples/webpmux \
-                 examples/anim_diff
+EXTRA_EXAMPLES = examples/gif2webp examples/vwebp examples/webpmux

 OUTPUT = $(OUT_LIBS) $(OUT_EXAMPLES)
 ifeq ($(MAKECMDGOALS),clean)
  OUTPUT += $(EXTRA_EXAMPLES)
-  OUTPUT += src/demux/libwebpdemux.a src/mux/libwebpmux.a $(EXTRA_LIB)
-  OUTPUT += examples/libgifdec.a examples/libanim_util.a
+  OUTPUT += src/demux/libwebpdemux.a src/mux/libwebpmux.a
 endif

 ex: $(OUT_EXAMPLES)
 all: ex $(EXTRA_EXAMPLES)
-extras: $(EXTRA_LIB)

 $(EX_FORMAT_DEC_OBJS): %.o: %.h

-# special dependencies:
-#   tree.c/vp8.c/bit_reader.c <-> bit_reader_inl.h, endian_inl.h
-#   bit_writer.c <-> endian_inl.h
-src/dec/tree.o: src/utils/bit_reader_inl.h src/utils/endian_inl.h
-src/dec/vp8.o: src/utils/bit_reader_inl.h src/utils/endian_inl.h
-src/utils/bit_reader.o: src/utils/bit_reader_inl.h src/utils/endian_inl.h
-src/utils/bit_writer.o: src/utils/endian_inl.h
-
 %.o: %.c $(HDRS)
 	$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@

-examples/libanim_util.a: $(ANIM_UTIL_OBJS)
 examples/libexample_util.a: $(EX_UTIL_OBJS)
-examples/libgifdec.a: $(GIFDEC_OBJS)
 src/libwebpdecoder.a: $(LIBWEBPDECODER_OBJS)
 src/libwebp.a: $(LIBWEBP_OBJS)
 src/mux/libwebpmux.a: $(LIBWEBPMUX_OBJS)
 src/demux/libwebpdemux.a: $(LIBWEBPDEMUX_OBJS)
-src/libwebpextras.a: $(LIBWEBPEXTRA_OBJS)

 %.a:
 	$(AR) $(ARFLAGS) $@ $^

-examples/anim_diff: examples/anim_diff.o $(ANIM_UTIL_OBJS) $(GIFDEC_OBJS)
 examples/cwebp: examples/cwebp.o $(EX_FORMAT_DEC_OBJS)
 examples/dwebp: examples/dwebp.o
-examples/gif2webp: examples/gif2webp.o $(GIFDEC_OBJS)
+examples/gif2webp: examples/gif2webp.o
 examples/vwebp: examples/vwebp.o
 examples/webpmux: examples/webpmux.o

-examples/anim_diff: examples/libanim_util.a examples/libgifdec.a
-examples/anim_diff: src/demux/libwebpdemux.a examples/libexample_util.a
-examples/anim_diff: src/libwebp.a
-examples/anim_diff: EXTRA_LIBS += $(GIF_LIBS)
-examples/anim_diff: EXTRA_FLAGS += -DWEBP_HAVE_GIF
-examples/cwebp: examples/libexample_util.a src/libwebp.a
+examples/cwebp: src/libwebp.a
 examples/cwebp: EXTRA_LIBS += $(CWEBP_LIBS)
 examples/dwebp: examples/libexample_util.a src/libwebpdecoder.a
 examples/dwebp: EXTRA_LIBS += $(DWEBP_LIBS)
-examples/gif2webp: examples/libexample_util.a examples/libgifdec.a
-examples/gif2webp: src/mux/libwebpmux.a src/libwebp.a
+examples/gif2webp: examples/libexample_util.a src/mux/libwebpmux.a src/libwebp.a
 examples/gif2webp: EXTRA_LIBS += $(GIF_LIBS)
-examples/gif2webp: EXTRA_FLAGS += -DWEBP_HAVE_GIF
 examples/vwebp: examples/libexample_util.a src/demux/libwebpdemux.a
 examples/vwebp: src/libwebp.a
 examples/vwebp: EXTRA_LIBS += $(GL_LIBS)
-examples/vwebp: EXTRA_FLAGS += -DWEBP_HAVE_GL
 examples/webpmux: examples/libexample_util.a src/mux/libwebpmux.a
 examples/webpmux: src/libwebpdecoder.a

@ -365,14 +250,14 @@ dist: DESTDIR := dist
 dist: OUT_EXAMPLES += $(EXTRA_EXAMPLES)
 dist: all
 	$(INSTALL) -m755 -d $(DESTDIR)/include/webp \
-	           $(DESTDIR)/bin $(DESTDIR)/doc $(DESTDIR)/lib
-	$(INSTALL) -m755 -s $(OUT_EXAMPLES) $(DESTDIR)/bin
+	           $(DESTDIR)/doc $(DESTDIR)/lib
+	$(INSTALL) -m755 -s $(OUT_EXAMPLES) $(DESTDIR)
 	$(INSTALL) -m644 $(HDRS_INSTALLED) $(DESTDIR)/include/webp
 	$(INSTALL) -m644 src/libwebp.a $(DESTDIR)/lib
 	$(INSTALL) -m644 src/demux/libwebpdemux.a $(DESTDIR)/lib
 	$(INSTALL) -m644 src/mux/libwebpmux.a $(DESTDIR)/lib
 	umask 022; \
-	for m in man/[cdv]webp.1 man/gif2webp.1 man/webpmux.1; do \
+	for m in man/[cd]webp.1 man/gif2webp.1 man/webpmux.1; do \
 	  basenam=$$(basename $$m .1); \
 	  $(GROFF) -t -e -man -T utf8 $$m \
 	    | $(COL) -bx >$(DESTDIR)/doc/$${basenam}.txt; \
@ -387,10 +272,23 @@ clean:
              src/demux/*.o src/demux/*~ \
              src/dsp/*.o src/dsp/*~ \
              src/enc/*.o src/enc/*~ \
-              src/extras/*.o src/extras/*~ \
              src/mux/*.o src/mux/*~ \
              src/utils/*.o src/utils/*~ \
              src/webp/*~ man/*~ doc/*~ swig/*~ \

-.PHONY: all clean dist ex
+superclean: clean
+	$(RM) -r .git *.log *.cache *~
+	$(RM) -r .deps */.deps */*/.deps
+	$(RM) -r .libs */.libs */*/.libs
+	$(RM) */*.lo */*/*.lo
+	$(RM) */*.la */*/*.la
+	$(RM) Makefile */Makefile */*/Makefile
+	$(RM) Makefile.in */Makefile.in */*/Makefile.in
+	$(RM) config.log autom4te.cache libtool config.h stamp-h1
+	$(RM) aclocal.m4 compile
+	$(RM) config.guess config.h.in config.sub config.status
+	$(RM) configure depcomp install-sh ltmain.sh missing src/libwebp.pc
+	$(RM) m4/*
+
+.PHONY: all clean dist ex superclean
 .SUFFIXES:
--- a/man/Makefile.am
+++ b/man/Makefile.am
@ -5,7 +5,4 @@ endif
 if BUILD_GIF2WEBP
  man_MANS += gif2webp.1
 endif
-if BUILD_VWEBP
-  man_MANS += vwebp.1
-endif
 EXTRA_DIST = $(man_MANS)
--- a/man/cwebp.1
+++ b/man/cwebp.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH CWEBP 1 "December 14, 2015"
+.TH CWEBP 1 "March 13, 2013"
 .SH NAME
 cwebp \- compress an image file to a WebP file
 .SH SYNOPSIS
@ -12,19 +12,13 @@ This manual page documents the
 command.
 .PP
 \fBcwebp\fP compresses an image using the WebP format.
-Input format can be either PNG, JPEG, TIFF, WebP or raw Y'CbCr samples.
+Input format can be either PNG, JPEG, TIFF or raw Y'CbCr samples.
 .SH OPTIONS
 The basic options are:
 .TP
 .BI \-o " string
 Specify the name of the output WebP file. If omitted, \fBcwebp\fP will
 perform compression but only report statistics.
-Using "\-" as output name will direct output to 'stdout'.
-.TP
-.BI \-\- " string
-Explicitly specify the input file. This option is useful if the input
-file starts with a '\-' for instance. This option must appear \fBlast\fP.
-Any other options afterward will be ignored.
 .TP
 .B \-h, \-help
 A short usage summary.
@ -35,17 +29,6 @@ A summary of all the possible options.
 .B \-version
 Print the version number (as major.minor.revision) and exit.
 .TP
-.B \-lossless
-Encode the image without any loss. For images with fully transparent area,
-the invisible pixel values (R/G/B or Y/U/V) will be preserved only if the
-\-exact option is used.
-.TP
-.BI \-near_lossless " int
-Use near\-lossless image preprocessing. This option adjusts pixel values
-to help compressibility, but has minimal impact on the visual quality.
-It triggers lossless compression mode automatically.
-Range is 0 (maximum preprocessing) to 100 (no preprocessing, the default).
-.TP
 .BI \-q " float
 Specify the compression factor for RGB channels between 0 and 100. The default
 is 75.
@ -53,52 +36,54 @@ is 75.
 In case of lossy compression (default), a small factor produces a smaller file
 with lower quality. Best quality is achieved by using a value of 100.
 .br
-In case of lossless compression (specified by the \fB\-lossless\fP option), a
-small factor enables faster compression speed, but produces a larger file.
-Maximum compression is achieved by using a value of 100.
-.TP
-.BI \-z " int
-Switch on \fBlossless\fP compression mode with the specified level between 0
-and 9, with level 0 being the fastest, 9 being the slowest. Fast mode
-produces larger file size than slower ones. A good default is \fB\-z 6\fP.
-This option is actually a shortcut for some predefined settings for quality
-and method. If options \fB\-q\fP  or \fB\-m\fP are subsequently used, they will
-invalidate the effect of this option.
+In case of lossless compression (specified by the \-lossless option), a small
+factor enables faster compression speed, but produces a larger file. Maximum
+compression is achieved by using a value of 100.
 .TP
 .BI \-alpha_q " int
 Specify the compression factor for alpha compression between 0 and 100.
 Lossless compression of alpha is achieved using a value of 100, while the lower
 values result in a lossy compression. The default is 100.
 .TP
+.BI \-f " int
+Specify the strength of the deblocking filter, between 0 (no filtering)
+and 100 (maximum filtering). A value of 0 will turn off any filtering.
+Higher value will increase the strength of the filtering process applied
+after decoding the picture. The higher the value the smoother the picture will
+appear. Typical values are usually in the range of 20 to 50.
+.TP
 .BI \-preset " string
-Specify a set of pre\-defined parameters to suit a particular type of
+Specify a set of pre-defined parameters to suit a particular type of
 source material. Possible values are:  \fBdefault\fP, \fBphoto\fP,
 \fBpicture\fP, \fBdrawing\fP, \fBicon\fP, \fBtext\fP. Since
 \fB\-preset\fP overwrites the other parameters' values (except the
 \fB\-q\fP one), this option should preferably appear first in the
 order of the arguments.
 .TP
+.BI \-sns " int
+Specify the amplitude of the spatial noise shaping. Spatial noise shaping
+(or \fBsns\fP for short) refers to a general collection of built-in algorithms
+used to decide which area of the picture should use relatively less bits,
+and where else to better transfer these bits. The possible range goes from
+0 (algorithm is off) to 100 (the maximal effect). The default value is 80.
+.TP
 .BI \-m " int
 Specify the compression method to use. This parameter controls the
 trade off between encoding speed and the compressed file size and quality.
 Possible values range from 0 to 6. Default value is 4.
 When higher values are used, the encoder will spend more time inspecting
 additional encoding possibilities and decide on the quality gain.
-Lower value can result in faster processing time at the expense of
+Lower value can result is faster processing time at the expense of
 larger file size and lower compression quality.
 .TP
-.BI \-resize " width height
-Resize the source to a rectangle with size \fBwidth\fP x \fBheight\fP.
-If either (but not both) of the \fBwidth\fP or \fBheight\fP parameters is 0,
-the value will be calculated preserving the aspect\-ratio.
-.TP
-.BI \-crop " x_position y_position width height
-Crop the source to a rectangle with top\-left corner at coordinates
-(\fBx_position\fP, \fBy_position\fP) and size \fBwidth\fP x \fBheight\fP.
-This cropping area must be fully contained within the source rectangle.
+.B \-jpeg_like
+Change the internal parameter mapping to better match the expected size
+of JPEG compression. This flag will generally produce an output file of
+similar size to its JPEG equivalent (for the same \fB\-q\fP setting), but
+with less visual distortion.
 .TP
 .B \-mt
-Use multi\-threading for encoding, if possible. This option is only effective
+Use multi-threading for encoding, if possible. This option is only effective
 when using lossy compression on a source with a transparency channel.
 .TP
 .B \-low_memory
@ -109,46 +94,13 @@ different in size and distortion. This flag is only effective for methods
 some side effects on the bitstream: it forces certain bitstream features
 like number of partitions (forced to 1). Note that a more detailed report
 of bitstream size is printed by \fBcwebp\fP when using this option.
-
-.SS LOSSY OPTIONS
-These options are only effective when doing lossy encoding (the default, with
-or without alpha).
-
-.TP
-.BI \-size " int
-Specify a target size (in bytes) to try and reach for the compressed output.
-Compressor will make several pass of partial encoding in order to get as
-close as possible to this target.
-.TP
-.BI \-psnr " float
-Specify a target PSNR (in dB) to try and reach for the compressed output.
-Compressor will make several pass of partial encoding in order to get as
-close as possible to this target.
-.TP
-.BI \-pass " int
-Set a maximum number of passes to use during the dichotomy used by
-options \fB\-size\fP or \fB\-psnr\fP. Maximum value is 10, default is 1.
 .TP
 .B \-af
-Turns auto\-filter on. This algorithm will spend additional time optimizing
-the filtering strength to reach a well\-balanced quality.
-.TP
-.B \-jpeg_like
-Change the internal parameter mapping to better match the expected size
-of JPEG compression. This flag will generally produce an output file of
-similar size to its JPEG equivalent (for the same \fB\-q\fP setting), but
-with less visual distortion.
+Turns auto-filter on. This algorithm will spend additional time optimizing
+the filtering strength to reach a well-balanced quality.

-.TP
-Advanced options:
-
-.TP
-.BI \-f " int
-Specify the strength of the deblocking filter, between 0 (no filtering)
-and 100 (maximum filtering). A value of 0 will turn off any filtering.
-Higher value will increase the strength of the filtering process applied
-after decoding the picture. The higher the value the smoother the picture will
-appear. Typical values are usually in the range of 20 to 50.
+.SH ADDITIONAL OPTIONS
+More advanced options are:
 .TP
 .BI \-sharpness " int
 Specify the sharpness of the filtering (if used).
@ -162,13 +114,6 @@ Use strong filtering (if filtering is being used thanks to the
 Disable strong filtering (if filtering is being used thanks to the
 \fB\-f\fP option) and use simple filtering instead.
 .TP
-.BI \-sns " int
-Specify the amplitude of the spatial noise shaping. Spatial noise shaping
-(or \fBsns\fP for short) refers to a general collection of built\-in algorithms
-used to decide which area of the picture should use relatively less bits,
-and where else to better transfer these bits. The possible range goes from
-0 (algorithm is off) to 100 (the maximal effect). The default value is 80.
-.TP
 .BI \-segments " int
 Change the number of partitions to use during the segmentation of the
 sns algorithm. Segments should be in range 1 to 4. Default value is 4.
@ -178,30 +123,96 @@ is used.
 .BI \-partition_limit " int
 Degrade quality by limiting the number of bits used by some macroblocks.
 Range is 0 (no degradation, the default) to 100 (full degradation).
-Useful values are usually around 30\-70 for moderately large images.
-In the VP8 format, the so\-called control partition has a limit of 512k and
+Useful values are usually around 30-70 for moderately large images.
+In the VP8 format, the so-called control partition has a limit of 512k and
 is used to store the following information: whether the macroblock is skipped,
 which segment it belongs to, whether it is coded as intra 4x4 or intra 16x16
-mode, and finally the prediction modes to use for each of the sub\-blocks.
+mode, and finally the prediction modes to use for each of the sub-blocks.
 For a very large image, 512k only leaves room to few bits per 16x16 macroblock.
 The absolute minimum is 4 bits per macroblock. Skip, segment, and mode
 information can use up almost all these 4 bits (although the case is unlikely),
 which is problematic for very large images. The partition_limit factor controls
-how frequently the most bit\-costly mode (intra 4x4) will be used. This is
+how frequently the most bit-costly mode (intra 4x4) will be used. This is
 useful in case the 512k limit is reached and the following message is displayed:
 \fIError code: 6 (PARTITION0_OVERFLOW: Partition #0 is too big to fit 512k)\fP.
-If using \fB\-partition_limit\fP is not enough to meet the 512k constraint, one
+If using \fB-partition_limit\fP is not enough to meet the 512k constraint, one
 should use less segments in order to save more header bits per macroblock.
-See the \fB\-segments\fP option.
+See the \fB-segments\fP option.
+.TP
+.BI \-size " int
+Specify a target size (in bytes) to try and reach for the compressed output.
+Compressor will make several pass of partial encoding in order to get as
+close as possible to this target.
+.TP
+.BI \-psnr " float
+Specify a target PSNR (in dB) to try and reach for the compressed output.
+Compressor will make several pass of partial encoding in order to get as
+close as possible to this target.
+.TP
+.BI \-pass " int
+Set a maximum number of passes to use during the dichotomy used by
+options \fB\-size\fP or \fB\-psnr\fP. Maximum value is 10.
+.TP
+.BI \-crop " x_position y_position width height
+Crop the source to a rectangle with top-left corner at coordinates
+(\fBx_position\fP, \fBy_position\fP) and size \fBwidth\fP x \fBheight\fP.
+This cropping area must be fully contained within the source rectangle.
+.TP
+.BI \-s " width height
+Specify that the input file actually consists of raw Y'CbCr samples following
+the ITU-R BT.601 recommendation, in 4:2:0 linear format.
+The luma plane has size \fBwidth\fP x \fBheight\fP.
+.TP
+.BI \-map " int
+Output additional ASCII-map of encoding information. Possible map values
+range from 1 to 6. This is only meant to help debugging.
+.TP
+.BI \-pre " int
+Specify a pre-processing filter. This option is a placeholder
+and has currently no effect.
+.TP
+.BI \-alpha_filter " string
+Specify the predictive filtering method for the alpha plane. One of 'none',
+\&'fast' or 'best', in increasing complexity and slowness order. Default is
+\&'fast'. Internally, alpha filtering is performed using four possible
+predictions (none, horizontal, vertical, gradient). The 'best' mode will try
+each mode in turn and pick the one which gives the smaller size. The 'fast'
+mode will just try to form an a-priori guess without testing all modes.
+.TP
+.BI \-alpha_method " int
+Specify the algorithm used for alpha compression: 0 or 1. Algorithm 0 denotes
+no compression, 1 uses WebP lossless format for compression. The default is 1.
+.TP
+.B \-alpha_cleanup
+Modify unseen RGB values under fully transparent area, to help compressibility.
+The default is off.
+.TP
+.B \-noalpha
+Using this option will discard the alpha channel.
+.TP
+.B \-lossless
+Encode the image without any loss.
+.TP
+.BI \-hint " string
+Specify the hint about input image type. Possible values are:
+\fBphoto\fP, \fBpicture\fP or \fBgraph\fP.
+.TP
+.BI \-metadata " string
+A comma separated list of metadata to copy from the input to the output if
+present.
+Valid values: \fBall\fP, \fBnone\fP, \fBexif\fP, \fBicc\fP, \fBxmp\fP.
+The default is \fBnone\fP.

-.SS LOGGING OPTIONS
-These options control the level of output:
+Note: each input format may not support all combinations.
+.TP
+.B \-noasm
+Disable all assembly optimizations.
 .TP
 .B \-v
 Print extra information (encoding time in particular).
 .TP
 .B \-print_psnr
-Compute and report average PSNR (Peak\-Signal\-To\-Noise ratio).
+Compute and report average PSNR (Peak-Signal-To-Noise ratio).
 .TP
 .B \-print_ssim
 Compute and report average SSIM (structural similarity
@ -219,69 +230,13 @@ Do not print anything.
 .TP
 .B \-short
 Only print brief information (output file size and PSNR) for testing purpose.
-.TP
-.BI \-map " int
-Output additional ASCII\-map of encoding information. Possible map values
-range from 1 to 6. This is only meant to help debugging.
-
-.SS ADDITIONAL OPTIONS
-More advanced options are:
-.TP
-.BI \-s " width height
-Specify that the input file actually consists of raw Y'CbCr samples following
-the ITU\-R BT.601 recommendation, in 4:2:0 linear format.
-The luma plane has size \fBwidth\fP x \fBheight\fP.
-.TP
-.BI \-pre " int
-Specify some preprocessing steps. Using a value of '2' will trigger
-quality\-dependent pseudo\-random dithering during RGBA\->YUVA conversion
-(lossy compression only).
-.TP
-.BI \-alpha_filter " string
-Specify the predictive filtering method for the alpha plane. One of 'none',
-\&'fast' or 'best', in increasing complexity and slowness order. Default is
-\&'fast'. Internally, alpha filtering is performed using four possible
-predictions (none, horizontal, vertical, gradient). The 'best' mode will try
-each mode in turn and pick the one which gives the smaller size. The 'fast'
-mode will just try to form an a priori guess without testing all modes.
-.TP
-.BI \-alpha_method " int
-Specify the algorithm used for alpha compression: 0 or 1. Algorithm 0 denotes
-no compression, 1 uses WebP lossless format for compression. The default is 1.
-.TP
-.B \-exact
-Preserve RGB values in transparent area. The default is off, to help
-compressibility.
-.TP
-.BI \-blend_alpha " int
-This option blends the alpha channel (if present) with the source using the
-background color specified in hexadecimal as 0xrrggbb. The alpha channel is
-afterward reset to the opaque value 255.
-.TP
-.B \-noalpha
-Using this option will discard the alpha channel.
-.TP
-.BI \-hint " string
-Specify the hint about input image type. Possible values are:
-\fBphoto\fP, \fBpicture\fP or \fBgraph\fP.
-.TP
-.BI \-metadata " string
-A comma separated list of metadata to copy from the input to the output if
-present.
-Valid values: \fBall\fP, \fBnone\fP, \fBexif\fP, \fBicc\fP, \fBxmp\fP.
-The default is \fBnone\fP.
-
-Note: each input format may not support all combinations.
-.TP
-.B \-noasm
-Disable all assembly optimizations.

 .SH BUGS
 Please report all bugs to our issue tracker:
-https://bugs.chromium.org/p/webp
+http://code.google.com/p/webp/issues
 .br
 Patches welcome! See this page to get started:
-http://www.webmproject.org/code/contribute/submitting\-patches/
+http://www.webmproject.org/code/contribute/submitting-patches/

 .SH EXAMPLES
 cwebp \-q 50 -lossless picture.png \-o picture_lossless.webp
@ -289,21 +244,18 @@ cwebp \-q 50 -lossless picture.png \-o picture_lossless.webp
 cwebp \-q 70 picture_with_alpha.png \-o picture_with_alpha.webp
 .br
 cwebp \-sns 70 \-f 50 \-size 60000 picture.png \-o picture.webp
-.br
-cwebp \-o picture.webp \-\- \-\-\-picture.png

 .SH AUTHORS
-\fBcwebp\fP is a part of libwebp and was written by the WebP team.
+\fBcwebp\fP was written by the WebP team.
 .br
-The latest source tree is available at
-https://chromium.googlesource.com/webm/libwebp
+The latest source tree is available at http://www.webmproject.org/code
 .PP
 This manual page was written by Pascal Massimino <pascal.massimino@gmail.com>,
 for the Debian project (and may be used by others).

 .SH SEE ALSO
 .BR dwebp (1),
-.BR gif2webp (1)
+.BR gif2webp (1).
 .br
 Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/man/dwebp.1
+++ b/man/dwebp.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH DWEBP 1 "December 11, 2015"
+.TH DWEBP 1 "February 01, 2013"
 .SH NAME
 dwebp \- decompress a WebP file to an image file
 .SH SYNOPSIS
@ -23,19 +23,6 @@ Print the version number (as major.minor.revision) and exit.
 .TP
 .BI \-o " string
 Specify the name of the output file (as PNG format by default).
-Using "-" as output name will direct output to 'stdout'.
-.TP
-.BI \-\- " string
-Explicitly specify the input file. This option is useful if the input
-file starts with an '\-' for instance. This option must appear \fBlast\fP.
-Any other options afterward will be ignored. If the input file is "\-",
-the data will be read from \fIstdin\fP instead of a file.
-.TP
-.B \-bmp
-Change the output format to uncompressed BMP.
-.TP
-.B \-tiff
-Change the output format to uncompressed TIFF.
 .TP
 .B \-pam
 Change the output format to PAM (retains alpha).
@ -45,7 +32,7 @@ Change the output format to PPM (discards alpha).
 .TP
 .B \-pgm
 Change the output format to PGM. The output consists of luma/chroma
-samples instead of RGB, using the IMC4 layout. This option is mainly
+samples instead of RGB, using the ICM4 layout. This option is mainly
 for verification and debugging purposes.
 .TP
 .B \-yuv
@ -61,20 +48,7 @@ edges (especially the red ones), but should be faster.
 .B \-nofilter
 Don't use the in-loop filtering process even if it is required by
 the bitstream. This may produce visible blocks on the non-compliant output,
-but it will make the decoding faster.
-.TP
-.BI \-dither " strength
-Specify a dithering \fBstrength\fP between 0 and 100. Dithering is a
-post-processing effect applied to chroma components in lossy compression.
-It helps by smoothing gradients and avoiding banding artifacts.
-.TP
-.BI \-alpha_dither
-If the compressed file contains a transparency plane that was quantized
-during compression, this flag will allow dithering the reconstructed plane
-in order to generate smoother transparency gradients.
-.TP
-.B \-nodither
-Disable all dithering (default).
+but will make the decoding faster.
 .TP
 .B \-mt
 Use multi-threading for decoding, if possible.
@ -87,19 +61,11 @@ The top-left corner will be snapped to even coordinates if needed.
 This option is meant to reduce the memory needed for cropping large images.
 Note: the cropping is applied \fIbefore\fP any scaling.
 .TP
-.B \-flip
-Flip decoded image vertically (can be useful for OpenGL textures for instance).
-.TP
-\fB\-resize\fR, \fB\-scale\fI width height\fR
+.BI \-scale " width height
 Rescale the decoded picture to dimension \fBwidth\fP x \fBheight\fP. This
 option is mostly intended to reducing the memory needed to decode large images,
-when only a small version is needed (thumbnail, preview, etc.). Note: scaling
+when only a small version is needed (thumbnail, preview, etc.).  Note: scaling
 is applied \fIafter\fP cropping.
-If either (but not both) of the \fBwidth\fP or \fBheight\fP parameters is 0,
-the value will be calculated preserving the aspect-ratio.
-.TP
-.B \-quiet
-Do not print anything.
 .TP
 .B \-v
 Print extra information (decoding time in particular).
@ -109,7 +75,7 @@ Disable all assembly optimizations.

 .SH BUGS
 Please report all bugs to our issue tracker:
-https://bugs.chromium.org/p/webp
+http://code.google.com/p/webp/issues
 .br
 Patches welcome! See this page to get started:
 http://www.webmproject.org/code/contribute/submitting-patches/
@ -118,24 +84,19 @@ http://www.webmproject.org/code/contribute/submitting-patches/
 dwebp picture.webp \-o output.png
 .br
 dwebp picture.webp \-ppm \-o output.ppm
-.br
-dwebp \-o output.ppm \-\- \-\-\-picture.webp
-.br
-cat picture.webp | dwebp \-o \- \-\- \- > output.ppm

 .SH AUTHORS
-\fBdwebp\fP is a part of libwebp and was written by the WebP team.
+\fBdwebp\fP was written by the WebP team.
 .br
-The latest source tree is available at
-https://chromium.googlesource.com/webm/libwebp
+The latest source tree is available at http://www.webmproject.org/code
 .PP
 This manual page was written by Pascal Massimino <pascal.massimino@gmail.com>,
 for the Debian project (and may be used by others).

 .SH SEE ALSO
 .BR cwebp (1),
-.BR gif2webp (1),
-.BR webpmux (1)
+.BR webpmux (1),
+.BR gif2webp (1).
 .br
 Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/man/gif2webp.1
+++ b/man/gif2webp.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH GIF2WEBP 1 "December 11, 2015"
+.TH GIF2WEBP 1 "February 01, 2013"
 .SH NAME
 gif2webp \- Convert a GIF image to WebP
 .SH SYNOPSIS
@ -18,7 +18,6 @@ The basic options are:
 .BI \-o " string
 Specify the name of the output WebP file. If omitted, \fBgif2webp\fP will
 perform conversion but only report statistics.
-Using "\-" as output name will direct output to 'stdout'.
 .TP
 .B \-h, \-help
 Usage information.
@ -29,10 +28,6 @@ Print the version number (as major.minor.revision) and exit.
 .B \-lossy
 Encode the image using lossy compression.
 .TP
-.B \-mixed
-Mixed compression mode: optimize compression of the image by picking either
-lossy or lossless compression for each frame heuristically.
-.TP
 .BI \-q " float
 Specify the compression factor for RGB channels between 0 and 100. The default
 is 75.
@ -54,50 +49,6 @@ additional encoding possibilities and decide on the quality gain.
 Lower value can result is faster processing time at the expense of
 larger file size and lower compression quality.
 .TP
-.BI \-min_size
-Encode image to achieve smallest size. This disables key frame insertion and
-picks the dispose method resulting in smallest output for each frame. It uses
-lossless compression by default, but can be combined with \-q, \-m, \-lossy or
-\-mixed options.
-.TP
-.BI \-kmin " int
-.TP
-.BI \-kmax " int
-Specify the minimum and maximum distance between consecutive key frames
-(independently decodable frames) in the output animation. The tool will insert
-some key frames into the output animation as needed so that this criteria is
-satisfied.
-.br
-A 'kmin' value of 0 will turn off insertion of key frames. A 'kmax' value of 0
-will result in all frames being key frames.
-Typical values are in the range 3 to 30. Default values are kmin = 9,
-kmax = 17 for lossless compression and kmin = 3, kmax = 5 for lossy compression.
-.br
-These two options are relevant only for animated images with large number of
-frames (>50).
-.br
-When lower values are used, more frames will be converted to key frames. This
-may lead to smaller number of frames required to decode a frame on average,
-thereby improving the decoding performance. But this may lead to slightly bigger
-file sizes.
-Higher values may lead to worse decoding performance, but smaller file sizes.
-.br
-Some restrictions:
-.br
-(i) kmin < kmax,
-.br
-(ii) kmin >= kmax / 2 + 1 and
-.br
-(iii) kmax - kmin <= 30.
-.br
-If any of these restrictions are not met, they will be enforced automatically.
-.TP
-.BI \-metadata " string
-A comma separated list of metadata to copy from the input to the output if
-present.
-Valid values: \fBall\fP, \fBnone\fP, \fBicc\fP, \fBxmp\fP.
-The default is \fBxmp\fP.
-.TP
 .BI \-f " int
 For lossy encoding only (specified by the \-lossy option). Specify the strength
 of the deblocking filter, between 0 (no filtering) and 100 (maximum filtering).
@ -106,10 +57,6 @@ strength of the filtering process applied after decoding the picture. The higher
 the value the smoother the picture will appear. Typical values are usually in
 the range of 20 to 50.
 .TP
-.B \-mt
-Use multi-threading for encoding, if possible. This option is only effective
-when using lossy compression.
-.TP
 .B \-v
 Print extra information.
 .TP
@ -118,7 +65,7 @@ Do not print anything.

 .SH BUGS
 Please report all bugs to our issue tracker:
-https://bugs.chromium.org/p/webp
+http://code.google.com/p/webp/issues
 .br
 Patches welcome! See this page to get started:
 http://www.webmproject.org/code/contribute/submitting-patches/
@ -131,22 +78,19 @@ gif2webp \-q 70 picture.gif \-o picture.webp
 gif2webp \-lossy \-m 3 picture.gif \-o picture_lossy.webp
 .br
 gif2webp \-lossy \-f 50 picture.gif \-o picture.webp
-.br
-gif2webp \-q 70 \-o picture.webp \-\- \-\-\-picture.gif

 .SH AUTHORS
-\fBgif2webp\fP is a part of libwebp and was written by the WebP team.
+\fBgif2webp\fP was written by the WebP team.
 .br
-The latest source tree is available at
-https://chromium.googlesource.com/webm/libwebp
+The latest source tree is available at http://www.webmproject.org/code
 .PP
 This manual page was written by Urvang Joshi <urvang@google.com>, for the
 Debian project (and may be used by others).

 .SH SEE ALSO
-.BR cwebp (1),
 .BR dwebp (1),
-.BR webpmux (1)
+.BR cwebp (1),
+.BR webpmux (1).
 .br
 Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/man/vwebp.1
+++ b/man/vwebp.1
@ -1,91 +0,0 @@
-.\"                                      Hey, EMACS: -*- nroff -*-
-.TH VWEBP 1 "December 11, 2015"
-.SH NAME
-vwebp \- decompress a WebP file and display it in a window
-.SH SYNOPSIS
-.B vwebp
-.RI [ options ] " input_file.webp
-.br
-.SH DESCRIPTION
-This manual page documents the
-.B vwebp
-command.
-.PP
-\fBvwebp\fP decompresses a WebP file and displays it in a window using OpenGL.
-.SH OPTIONS
-.TP
-.B \-h
-Print usage summary.
-.TP
-.B \-version
-Print version number and exit.
-.TP
-.B \-noicc
-Don't use the ICC profile if present.
-.TP
-.B \-nofancy
-Don't use the fancy YUV420 upscaler.
-.TP
-.B \-nofilter
-Disable in-loop filtering.
-.TP
-.BI \-dither " strength
-Specify a dithering \fBstrength\fP between 0 and 100. Dithering is a
-post-processing effect applied to chroma components in lossy compression.
-It helps by smoothing gradients and avoiding banding artifacts. Default: 50.
-.TP
-.BI \-noalphadither
-By default, quantized transparency planes are dithered during decompression,
-to smooth the gradients. This flag will prevent this dithering.
-.TP
-.B \-mt
-Use multi-threading for decoding, if possible.
-.TP
-.B \-info
-Display image information on top of the decoded image.
-.TP
-.BI \-\- " string
-Explicitly specify the input file. This option is useful if the input
-file starts with an '\-' for instance. This option must appear \fBlast\fP.
-Any other options afterward will be ignored. If the input file is "\-",
-the data will be read from \fIstdin\fP instead of a file.
-.TP
-
-.SH KEYBOARD SHORTCUTS
-.TP
-.B 'c'
-Toggle use of color profile.
-.TP
-.B 'i'
-Overlay file information.
-.TP
-.B 'q' / 'Q' / ESC
-Quit.
-
-.SH BUGS
-Please report all bugs to our issue tracker:
-https://bugs.chromium.org/p/webp
-.br
-Patches welcome! See this page to get started:
-http://www.webmproject.org/code/contribute/submitting-patches/
-
-.SH EXAMPLES
-vwebp picture.webp
-.br
-vwebp picture.webp -mt -dither 0
-.br
-vwebp \-\- \-\-\-picture.webp
-
-.SH AUTHORS
-\fBvwebp\fP is a part of libwebp and was written by the WebP team.
-.br
-The latest source tree is available at
-https://chromium.googlesource.com/webm/libwebp
-.PP
-This manual page was written for the Debian project (and may be used by others).
-
-.SH SEE ALSO
-.BR dwebp (1)
-.br
-Please refer to http://developers.google.com/speed/webp/ for additional
-information.
--- a/man/webpmux.1
+++ b/man/webpmux.1
@ -1,8 +1,7 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH WEBPMUX 1 "December 11, 2015"
+.TH WEBPMUX 1 "March 16, 2013"
 .SH NAME
-webpmux \- create animated WebP files from non\-animated WebP images, extract
-frames from animated WebP images, and manage XMP/EXIF metadata and ICC profile.
+webpmux \- command line tool to create WebP Mux/container file.
 .SH SYNOPSIS
 .B webpmux \-get
 .I GET_OPTIONS
@ -46,8 +45,8 @@ This manual page documents the
 .B webpmux
 command.
 .PP
-\fBwebpmux\fP can be used to create/extract from animated WebP files, as well as
-to add/extract/strip XMP/EXIF metadata and ICC profile.
+\fBwebpmux\fP can be used to create a WebP container file
+and extract/strip relevant data from the container file.
 .SH OPTIONS
 .SS GET_OPTIONS (\-get):
 .TP
@ -61,7 +60,7 @@ Get EXIF metadata.
 Get XMP metadata.
 .TP
 .BI frame " n
-Get nth frame from an animated image. (n = 0 has a special meaning: last frame).
+Get nth frame.

 .SS SET_OPTIONS (\-set)
 .TP
@ -92,16 +91,13 @@ Strip EXIF metadata.
 Strip XMP metadata.

 .SS FRAME_OPTIONS (\-frame)
-Create an animated WebP file from multiple (non\-animated) WebP images.
 .TP
-.I file_i +di[+xi+yi[+mi[bi]]]
+.I file_i +di[+xi+yi[+mi]]
 Where: 'file_i' is the i'th frame (WebP format), 'xi','yi' specify the image
-offset for this frame, 'di' is the pause duration before next frame, 'mi' is
-the dispose method for this frame (0 for NONE or 1 for BACKGROUND) and 'bi' is
-the blending method for this frame (+b for BLEND or \-b for NO_BLEND).
-Argument 'bi' can be omitted and will default to +b (BLEND).
-Also, 'mi' can be omitted if 'bi' is omitted and will default to 0 (NONE).
-Finally, if 'mi' and 'bi' are omitted then 'xi' and 'yi' can be omitted and will
+offset for this frame, 'di' is the pause duration before next frame and 'mi' is
+the dispose method for this frame (0 for NONE or 1 for BACKGROUND).
+'mi' can be omitted and will default to 0 (NONE).
+Additionally, if 'mi' is ommitted then'xi' and 'yi' can be omitted and will
 default to +0+0.
 .TP
 .BI \-loop " n
@ -129,84 +125,50 @@ The nature of EXIF, XMP and ICC data is not checked and is assumed to be valid.

 .SH BUGS
 Please report all bugs to our issue tracker:
-https://bugs.chromium.org/p/webp
+http://code.google.com/p/webp/issues
 .br
 Patches welcome! See this page to get started:
-http://www.webmproject.org/code/contribute/submitting\-patches/
+http://www.webmproject.org/code/contribute/submitting-patches/

 .SH EXAMPLES
-.P
-Add ICC profile:
-.br
 webpmux \-set icc image_profile.icc in.webp \-o icc_container.webp
-.P
-Extract ICC profile:
 .br
 webpmux \-get icc icc_container.webp \-o image_profile.icc
-.P
-Strip ICC profile:
 .br
 webpmux \-strip icc icc_container.webp \-o without_icc.webp
-.P
-Add XMP metadata:
 .br
 webpmux \-set xmp image_metadata.xmp in.webp \-o xmp_container.webp
-.P
-Extract XMP metadata:
 .br
 webpmux \-get xmp xmp_container.webp \-o image_metadata.xmp
-.P
-Strip XMP metadata:
 .br
 webpmux \-strip xmp xmp_container.webp \-o without_xmp.webp
-.P
-Add EXIF metadata:
 .br
 webpmux \-set exif image_metadata.exif in.webp \-o exif_container.webp
-.P
-Extract EXIF metadata:
 .br
 webpmux \-get exif exif_container.webp \-o image_metadata.exif
-.P
-Strip EXIF metadata:
 .br
 webpmux \-strip exif exif_container.webp \-o without_exif.webp
-.P
-Create an animated WebP file from 3 (non\-animated) WebP images:
 .br
-webpmux \-frame 1.webp +100 \-frame 2.webp +100+50+50
+webpmux \-frame anim_1.webp +100 \-frame anim_2.webp +100+50+50 \-loop 10
 .br
 .RS 8
-\-frame 3.webp +100+50+50+1+b \-loop 10 \-bgcolor 255,255,255,255
-.br
-\-o anim_container.webp
+\-bgcolor 255,255,255,255 \-o anim_container.webp
 .RE
-.P
-Get the 2nd frame from an animated WebP file:
 .br
 webpmux \-get frame 2 anim_container.webp \-o frame_2.webp
-.P
-Using \-get/\-set/\-strip with input file name starting with '\-':
-.br
-webpmux \-set icc image_profile.icc \-o icc_container.webp \-\- \-\-\-in.webp
-.br
-webpmux \-get icc \-o image_profile.icc \-\- \-\-\-icc_container.webp
-.br
-webpmux \-strip icc \-o without_icc.webp \-\- \-\-\-icc_container.webp

 .SH AUTHORS
-\fBwebpmux\fP is a part of libwebp and was written by the WebP team.
+\fBwebpmux\fP is written by the WebP team.
 .br
-The latest source tree is available at
-https://chromium.googlesource.com/webm/libwebp
+The latest source tree is available at http://www.webmproject.org/code
 .PP
 This manual page was written by Vikas Arora <vikaas.arora@gmail.com>,
 for the Debian project (and may be used by others).

 .SH SEE ALSO
-.BR cwebp (1),
 .BR dwebp (1),
-.BR gif2webp (1)
+.BR cwebp (1),
+.BR gif2webp (1).
 .br
 Please refer to http://developers.google.com/speed/webp/ for additional
 information.
--- a/src/.gitignore
+++ b/src/.gitignore
@ -0,0 +1 @@
+/*.pc
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -1,5 +1,5 @@
-# The mux, demux and extras libraries depend on libwebp, thus the '.' to force
-# the build order so it's available to them.
+# The mux and demux libraries depend on libwebp, thus the '.' to force the
+# build order so it's available to them.
 SUBDIRS = dec enc dsp utils .
 if WANT_MUX
  SUBDIRS += mux
@ -7,10 +7,8 @@ endif
 if WANT_DEMUX
  SUBDIRS += demux
 endif
-if WANT_EXTRAS
-  SUBDIRS += extras
-endif

+AM_CPPFLAGS = -I$(top_srcdir)/src
 lib_LTLIBRARIES = libwebp.la

 if BUILD_LIBWEBPDECODER
@ -38,7 +36,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
 # other than the ones listed on the command line, i.e., after linking, it will
 # not have unresolved symbols. Some platforms (Windows among them) require all
 # symbols in shared libraries to be resolved at library creation.
-libwebp_la_LDFLAGS = -no-undefined -version-info 6:0:0
+libwebp_la_LDFLAGS = -no-undefined -version-info 4:3:0
 libwebpincludedir = $(includedir)/webp
 pkgconfig_DATA = libwebp.pc

@ -50,7 +48,7 @@ if BUILD_LIBWEBPDECODER
  libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
  libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la

-  libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 2:0:0
+  libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 0:1:0
  pkgconfig_DATA += libwebpdecoder.pc
 endif

--- a/src/dec/Makefile.am
+++ b/src/dec/Makefile.am
@ -1,14 +1,14 @@
+AM_CPPFLAGS = -I$(top_srcdir)/src
 noinst_LTLIBRARIES = libwebpdecode.la

 libwebpdecode_la_SOURCES =
 libwebpdecode_la_SOURCES += alpha.c
-libwebpdecode_la_SOURCES += alphai.h
 libwebpdecode_la_SOURCES += buffer.c
-libwebpdecode_la_SOURCES += common.h
 libwebpdecode_la_SOURCES += decode_vp8.h
 libwebpdecode_la_SOURCES += frame.c
 libwebpdecode_la_SOURCES += idec.c
 libwebpdecode_la_SOURCES += io.c
+libwebpdecode_la_SOURCES += layer.c
 libwebpdecode_la_SOURCES += quant.c
 libwebpdecode_la_SOURCES += tree.c
 libwebpdecode_la_SOURCES += vp8.c
@ -24,5 +24,5 @@ libwebpdecodeinclude_HEADERS += ../webp/types.h
 noinst_HEADERS =
 noinst_HEADERS += ../webp/format_constants.h

-libwebpdecode_la_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
+libwebpdecode_la_CPPFLAGS = $(USE_EXPERIMENTAL_CODE)
 libwebpdecodeincludedir = $(includedir)/webp
--- a/src/dec/alpha.c
+++ b/src/dec/alpha.c
@ -12,156 +12,104 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <stdlib.h>
-#include "./alphai.h"
 #include "./vp8i.h"
 #include "./vp8li.h"
-#include "../dsp/dsp.h"
+#include "../utils/filters.h"
 #include "../utils/quant_levels_dec.h"
-#include "../utils/utils.h"
 #include "../webp/format_constants.h"

-//------------------------------------------------------------------------------
-// ALPHDecoder object.
-
-ALPHDecoder* ALPHNew(void) {
-  ALPHDecoder* const dec = (ALPHDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
-  return dec;
-}
-
-void ALPHDelete(ALPHDecoder* const dec) {
-  if (dec != NULL) {
-    VP8LDelete(dec->vp8l_dec_);
-    dec->vp8l_dec_ = NULL;
-    WebPSafeFree(dec);
-  }
-}
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 //------------------------------------------------------------------------------
-// Decoding.
+// Decodes the compressed data 'data' of size 'data_size' into the 'output'.
+// The 'output' buffer should be pre-allocated and must be of the same
+// dimension 'height'x'width', as that of the image.
+//
+// Returns 1 on successfully decoding the compressed alpha and
+//         0 if either:
+//           error in bit-stream header (invalid compression mode or filter), or
+//           error returned by appropriate compression method.

-// Initialize alpha decoding by parsing the alpha header and decoding the image
-// header for alpha data stored using lossless compression.
-// Returns false in case of error in alpha header (data too short, invalid
-// compression method or filter, error in lossless header data etc).
-static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
-                    size_t data_size, int width, int height, uint8_t* output) {
+static int DecodeAlpha(const uint8_t* data, size_t data_size,
+                       int width, int height, uint8_t* output) {
+  WEBP_FILTER_TYPE filter;
+  int pre_processing;
+  int rsrv;
  int ok = 0;
+  int method;
  const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
  const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
-  int rsrv;

  assert(width > 0 && height > 0);
  assert(data != NULL && output != NULL);

-  dec->width_ = width;
-  dec->height_ = height;
-
  if (data_size <= ALPHA_HEADER_LEN) {
    return 0;
  }

-  dec->method_ = (data[0] >> 0) & 0x03;
-  dec->filter_ = (data[0] >> 2) & 0x03;
-  dec->pre_processing_ = (data[0] >> 4) & 0x03;
+  method = (data[0] >> 0) & 0x03;
+  filter = (data[0] >> 2) & 0x03;
+  pre_processing = (data[0] >> 4) & 0x03;
  rsrv = (data[0] >> 6) & 0x03;
-  if (dec->method_ < ALPHA_NO_COMPRESSION ||
-      dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
-      dec->filter_ >= WEBP_FILTER_LAST ||
-      dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS ||
+  if (method < ALPHA_NO_COMPRESSION ||
+      method > ALPHA_LOSSLESS_COMPRESSION ||
+      filter >= WEBP_FILTER_LAST ||
+      pre_processing > ALPHA_PREPROCESSED_LEVELS ||
      rsrv != 0) {
    return 0;
  }

-  if (dec->method_ == ALPHA_NO_COMPRESSION) {
-    const size_t alpha_decoded_size = dec->width_ * dec->height_;
+  if (method == ALPHA_NO_COMPRESSION) {
+    const size_t alpha_decoded_size = height * width;
    ok = (alpha_data_size >= alpha_decoded_size);
+    if (ok) memcpy(output, alpha_data, alpha_decoded_size);
  } else {
-    assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
-    ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size, output);
+    ok = VP8LDecodeAlphaImageStream(width, height, alpha_data, alpha_data_size,
+                                    output);
  }
-  VP8FiltersInit();
-  return ok;
-}

-// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha
-// starting from row number 'row'. It assumes that rows up to (row - 1) have
-// already been decoded.
-// Returns false in case of bitstream error.
-static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
-  ALPHDecoder* const alph_dec = dec->alph_dec_;
-  const int width = alph_dec->width_;
-  const int height = alph_dec->height_;
-  WebPUnfilterFunc unfilter_func = WebPUnfilters[alph_dec->filter_];
-  uint8_t* const output = dec->alpha_plane_;
-  if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
-    const size_t offset = row * width;
-    const size_t num_pixels = num_rows * width;
-    assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN + offset + num_pixels);
-    memcpy(dec->alpha_plane_ + offset,
-           dec->alpha_data_ + ALPHA_HEADER_LEN + offset, num_pixels);
-  } else {  // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
-    assert(alph_dec->vp8l_dec_ != NULL);
-    if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
-      return 0;
+  if (ok) {
+    WebPUnfilterFunc unfilter_func = WebPUnfilters[filter];
+    if (unfilter_func != NULL) {
+      // TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
+      // and apply filter per image-row.
+      unfilter_func(width, height, width, output);
+    }
+    if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
+      ok = DequantizeLevels(output, width, height);
    }
  }

-  if (unfilter_func != NULL) {
-    unfilter_func(width, height, width, row, num_rows, output);
-  }
-
-  if (row + num_rows == dec->pic_hdr_.height_) {
-    dec->is_alpha_decoded_ = 1;
-  }
-  return 1;
+  return ok;
 }

 //------------------------------------------------------------------------------
-// Main entry point.

 const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
                                      int row, int num_rows) {
  const int width = dec->pic_hdr_.width_;
  const int height = dec->pic_hdr_.height_;

-  if (row < 0 || num_rows <= 0 || row + num_rows > height) {
+  if (row < 0 || num_rows < 0 || row + num_rows > height) {
    return NULL;    // sanity check.
  }

  if (row == 0) {
-    // Initialize decoding.
-    assert(dec->alpha_plane_ != NULL);
-    dec->alph_dec_ = ALPHNew();
-    if (dec->alph_dec_ == NULL) return NULL;
-    if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
-                  width, height, dec->alpha_plane_)) {
-      ALPHDelete(dec->alph_dec_);
-      dec->alph_dec_ = NULL;
-      return NULL;
+    // Decode everything during the first call.
+    assert(!dec->is_alpha_decoded_);
+    if (!DecodeAlpha(dec->alpha_data_, (size_t)dec->alpha_data_size_,
+                     width, height, dec->alpha_plane_)) {
+      return NULL;  // Error.
    }
-    // if we allowed use of alpha dithering, check whether it's needed at all
-    if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
-      dec->alpha_dithering_ = 0;  // disable dithering
-    } else {
-      num_rows = height;          // decode everything in one pass
-    }
-  }
-
-  if (!dec->is_alpha_decoded_) {
-    int ok = 0;
-    assert(dec->alph_dec_ != NULL);
-    ok = ALPHDecode(dec, row, num_rows);
-    if (ok && dec->alpha_dithering_ > 0) {
-      ok = WebPDequantizeLevels(dec->alpha_plane_, width, height,
-                                dec->alpha_dithering_);
-    }
-    if (!ok || dec->is_alpha_decoded_) {
-      ALPHDelete(dec->alph_dec_);
-      dec->alph_dec_ = NULL;
-    }
-    if (!ok) return NULL;  // Error.
+    dec->is_alpha_decoded_ = 1;
  }

  // Return a pointer to the current decoded row.
  return dec->alpha_plane_ + row * width;
 }
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/alphai.h
+++ b/src/dec/alphai.h
@ -1,55 +0,0 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Alpha decoder: internal header.
-//
-// Author: Urvang (urvang@google.com)
-
-#ifndef WEBP_DEC_ALPHAI_H_
-#define WEBP_DEC_ALPHAI_H_
-
-#include "./webpi.h"
-#include "../utils/filters.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct VP8LDecoder;  // Defined in dec/vp8li.h.
-
-typedef struct ALPHDecoder ALPHDecoder;
-struct ALPHDecoder {
-  int width_;
-  int height_;
-  int method_;
-  WEBP_FILTER_TYPE filter_;
-  int pre_processing_;
-  struct VP8LDecoder* vp8l_dec_;
-  VP8Io io_;
-  int use_8b_decode;  // Although alpha channel requires only 1 byte per
-                      // pixel, sometimes VP8LDecoder may need to allocate
-                      // 4 bytes per pixel internally during decode.
-};
-
-//------------------------------------------------------------------------------
-// internal functions. Not public.
-
-// Allocates a new alpha decoder instance.
-ALPHDecoder* ALPHNew(void);
-
-// Clears and deallocates an alpha decoder instance.
-void ALPHDelete(ALPHDecoder* const dec);
-
-//------------------------------------------------------------------------------
-
-#ifdef __cplusplus
-}    // extern "C"
-#endif
-
-#endif  /* WEBP_DEC_ALPHAI_H_ */
--- a/src/dec/buffer.c
+++ b/src/dec/buffer.c
@ -17,6 +17,10 @@
 #include "./webpi.h"
 #include "../utils/utils.h"

+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 //------------------------------------------------------------------------------
 // WebPDecBuffer

@ -33,11 +37,6 @@ static int IsValidColorspace(int webp_csp_mode) {
  return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
 }

-// strictly speaking, the very last (or first, if flipped) row
-// doesn't require padding.
-#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE)       \
-    (uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)
-
 static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
  int ok = 1;
  const WEBP_CSP_MODE mode = buffer->colorspace;
@ -47,41 +46,33 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
    ok = 0;
  } else if (!WebPIsRGBMode(mode)) {   // YUV checks
    const WebPYUVABuffer* const buf = &buffer->u.YUVA;
-    const int uv_width  = (width  + 1) / 2;
-    const int uv_height = (height + 1) / 2;
-    const int y_stride = abs(buf->y_stride);
-    const int u_stride = abs(buf->u_stride);
-    const int v_stride = abs(buf->v_stride);
-    const int a_stride = abs(buf->a_stride);
-    const uint64_t y_size = MIN_BUFFER_SIZE(width, height, y_stride);
-    const uint64_t u_size = MIN_BUFFER_SIZE(uv_width, uv_height, u_stride);
-    const uint64_t v_size = MIN_BUFFER_SIZE(uv_width, uv_height, v_stride);
-    const uint64_t a_size = MIN_BUFFER_SIZE(width, height, a_stride);
+    const uint64_t y_size = (uint64_t)buf->y_stride * height;
+    const uint64_t u_size = (uint64_t)buf->u_stride * ((height + 1) / 2);
+    const uint64_t v_size = (uint64_t)buf->v_stride * ((height + 1) / 2);
+    const uint64_t a_size = (uint64_t)buf->a_stride * height;
    ok &= (y_size <= buf->y_size);
    ok &= (u_size <= buf->u_size);
    ok &= (v_size <= buf->v_size);
-    ok &= (y_stride >= width);
-    ok &= (u_stride >= uv_width);
-    ok &= (v_stride >= uv_width);
+    ok &= (buf->y_stride >= width);
+    ok &= (buf->u_stride >= (width + 1) / 2);
+    ok &= (buf->v_stride >= (width + 1) / 2);
    ok &= (buf->y != NULL);
    ok &= (buf->u != NULL);
    ok &= (buf->v != NULL);
    if (mode == MODE_YUVA) {
-      ok &= (a_stride >= width);
+      ok &= (buf->a_stride >= width);
      ok &= (a_size <= buf->a_size);
      ok &= (buf->a != NULL);
    }
  } else {    // RGB checks
    const WebPRGBABuffer* const buf = &buffer->u.RGBA;
-    const int stride = abs(buf->stride);
-    const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
+    const uint64_t size = (uint64_t)buf->stride * height;
    ok &= (size <= buf->size);
-    ok &= (stride >= width * kModeBpp[mode]);
+    ok &= (buf->stride >= width * kModeBpp[mode]);
    ok &= (buf->rgba != NULL);
  }
  return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
 }
-#undef MIN_BUFFER_SIZE

 static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
  const int w = buffer->width;
@ -144,35 +135,9 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
  return CheckDecBuffer(buffer);
 }

-VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
-  if (buffer == NULL) {
-    return VP8_STATUS_INVALID_PARAM;
-  }
-  if (WebPIsRGBMode(buffer->colorspace)) {
-    WebPRGBABuffer* const buf = &buffer->u.RGBA;
-    buf->rgba += (buffer->height - 1) * buf->stride;
-    buf->stride = -buf->stride;
-  } else {
-    WebPYUVABuffer* const buf = &buffer->u.YUVA;
-    const int H = buffer->height;
-    buf->y += (H - 1) * buf->y_stride;
-    buf->y_stride = -buf->y_stride;
-    buf->u += ((H - 1) >> 1) * buf->u_stride;
-    buf->u_stride = -buf->u_stride;
-    buf->v += ((H - 1) >> 1) * buf->v_stride;
-    buf->v_stride = -buf->v_stride;
-    if (buf->a != NULL) {
-      buf->a += (H - 1) * buf->a_stride;
-      buf->a_stride = -buf->a_stride;
-    }
-  }
-  return VP8_STATUS_OK;
-}
-
 VP8StatusCode WebPAllocateDecBuffer(int w, int h,
                                    const WebPDecoderOptions* const options,
                                    WebPDecBuffer* const out) {
-  VP8StatusCode status;
  if (out == NULL || w <= 0 || h <= 0) {
    return VP8_STATUS_INVALID_PARAM;
  }
@ -189,28 +154,18 @@ VP8StatusCode WebPAllocateDecBuffer(int w, int h,
      h = ch;
    }
    if (options->use_scaling) {
-      int scaled_width = options->scaled_width;
-      int scaled_height = options->scaled_height;
-      if (!WebPRescalerGetScaledDimensions(
-              w, h, &scaled_width, &scaled_height)) {
+      if (options->scaled_width <= 0 || options->scaled_height <= 0) {
        return VP8_STATUS_INVALID_PARAM;
      }
-      w = scaled_width;
-      h = scaled_height;
+      w = options->scaled_width;
+      h = options->scaled_height;
    }
  }
  out->width = w;
  out->height = h;

-  // Then, allocate buffer for real.
-  status = AllocateBuffer(out);
-  if (status != VP8_STATUS_OK) return status;
-
-  // Use the stride trick if vertical flip is needed.
-  if (options != NULL && options->flip) {
-    status = WebPFlipBuffer(out);
-  }
-  return status;
+  // Then, allocate buffer for real
+  return AllocateBuffer(out);
 }

 //------------------------------------------------------------------------------
@ -227,9 +182,8 @@ int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) {

 void WebPFreeDecBuffer(WebPDecBuffer* buffer) {
  if (buffer != NULL) {
-    if (!buffer->is_external_memory) {
-      WebPSafeFree(buffer->private_memory);
-    }
+    if (!buffer->is_external_memory)
+      free(buffer->private_memory);
    buffer->private_memory = NULL;
  }
 }
@ -258,3 +212,6 @@ void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {

 //------------------------------------------------------------------------------

+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/common.h
+++ b/src/dec/common.h
@ -1,54 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Definitions and macros common to encoding and decoding
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#ifndef WEBP_DEC_COMMON_H_
-#define WEBP_DEC_COMMON_H_
-
-// intra prediction modes
-enum { B_DC_PRED = 0,   // 4x4 modes
-       B_TM_PRED = 1,
-       B_VE_PRED = 2,
-       B_HE_PRED = 3,
-       B_RD_PRED = 4,
-       B_VR_PRED = 5,
-       B_LD_PRED = 6,
-       B_VL_PRED = 7,
-       B_HD_PRED = 8,
-       B_HU_PRED = 9,
-       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
-
-       // Luma16 or UV modes
-       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
-       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
-       B_PRED = NUM_BMODES,   // refined I4x4 mode
-       NUM_PRED_MODES = 4,
-
-       // special modes
-       B_DC_PRED_NOTOP = 4,
-       B_DC_PRED_NOLEFT = 5,
-       B_DC_PRED_NOTOPLEFT = 6,
-       NUM_B_DC_MODES = 7 };
-
-enum { MB_FEATURE_TREE_PROBS = 3,
-       NUM_MB_SEGMENTS = 4,
-       NUM_REF_LF_DELTAS = 4,
-       NUM_MODE_LF_DELTAS = 4,    // I4x4, ZERO, *, SPLIT
-       MAX_NUM_PARTITIONS = 8,
-       // Probabilities
-       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
-       NUM_BANDS = 8,
-       NUM_CTX = 3,
-       NUM_PROBAS = 11
-     };
-
-#endif    // WEBP_DEC_COMMON_H_
--- a/src/dec/decode_vp8.h
+++ b/src/dec/decode_vp8.h
@ -16,7 +16,7 @@

 #include "../webp/decode.h"

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

@ -132,8 +132,7 @@ static WEBP_INLINE int VP8InitIo(VP8Io* const io) {
  return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION);
 }

-// Decode the VP8 frame header. Returns true if ok.
-// Note: 'io->data' must be pointing to the start of the VP8 frame header.
+// Start decoding a new picture. Returns true if ok.
 int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io);

 // Decode a picture. Will call VP8GetHeaders() if it wasn't done already.
@ -178,7 +177,7 @@ WEBP_EXTERN(int) VP8LGetInfo(
    const uint8_t* data, size_t data_size,  // data available so far
    int* const width, int* const height, int* const has_alpha);

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/src/dec/frame.c
+++ b/src/dec/frame.c
@ -15,180 +15,11 @@
 #include "./vp8i.h"
 #include "../utils/utils.h"

-//------------------------------------------------------------------------------
-// Main reconstruction function.
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-static const int kScan[16] = {
-  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
-  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
-  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
-  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
-};
-
-static int CheckMode(int mb_x, int mb_y, int mode) {
-  if (mode == B_DC_PRED) {
-    if (mb_x == 0) {
-      return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
-    } else {
-      return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
-    }
-  }
-  return mode;
-}
-
-static void Copy32b(uint8_t* const dst, const uint8_t* const src) {
-  memcpy(dst, src, 4);
-}
-
-static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,
-                                    uint8_t* const dst) {
-  switch (bits >> 30) {
-    case 3:
-      VP8Transform(src, dst, 0);
-      break;
-    case 2:
-      VP8TransformAC3(src, dst);
-      break;
-    case 1:
-      VP8TransformDC(src, dst);
-      break;
-    default:
-      break;
-  }
-}
-
-static void DoUVTransform(uint32_t bits, const int16_t* const src,
-                          uint8_t* const dst) {
-  if (bits & 0xff) {    // any non-zero coeff at all?
-    if (bits & 0xaa) {  // any non-zero AC coefficient?
-      VP8TransformUV(src, dst);   // note we don't use the AC3 variant for U/V
-    } else {
-      VP8TransformDCUV(src, dst);
-    }
-  }
-}
-
-static void ReconstructRow(const VP8Decoder* const dec,
-                           const VP8ThreadContext* ctx) {
-  int j;
-  int mb_x;
-  const int mb_y = ctx->mb_y_;
-  const int cache_id = ctx->id_;
-  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
-  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
-  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
-
-  // Initialize left-most block.
-  for (j = 0; j < 16; ++j) {
-    y_dst[j * BPS - 1] = 129;
-  }
-  for (j = 0; j < 8; ++j) {
-    u_dst[j * BPS - 1] = 129;
-    v_dst[j * BPS - 1] = 129;
-  }
-
-  // Init top-left sample on left column too.
-  if (mb_y > 0) {
-    y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
-  } else {
-    // we only need to do this init once at block (0,0).
-    // Afterward, it remains valid for the whole topmost row.
-    memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
-    memset(u_dst - BPS - 1, 127, 8 + 1);
-    memset(v_dst - BPS - 1, 127, 8 + 1);
-  }
-
-  // Reconstruct one row.
-  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
-    const VP8MBData* const block = ctx->mb_data_ + mb_x;
-
-    // Rotate in the left samples from previously decoded block. We move four
-    // pixels at a time for alignment reason, and because of in-loop filter.
-    if (mb_x > 0) {
-      for (j = -1; j < 16; ++j) {
-        Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
-      }
-      for (j = -1; j < 8; ++j) {
-        Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
-        Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
-      }
-    }
-    {
-      // bring top samples into the cache
-      VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;
-      const int16_t* const coeffs = block->coeffs_;
-      uint32_t bits = block->non_zero_y_;
-      int n;
-
-      if (mb_y > 0) {
-        memcpy(y_dst - BPS, top_yuv[0].y, 16);
-        memcpy(u_dst - BPS, top_yuv[0].u, 8);
-        memcpy(v_dst - BPS, top_yuv[0].v, 8);
-      }
-
-      // predict and add residuals
-      if (block->is_i4x4_) {   // 4x4
-        uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
-
-        if (mb_y > 0) {
-          if (mb_x >= dec->mb_w_ - 1) {    // on rightmost border
-            memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
-          } else {
-            memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
-          }
-        }
-        // replicate the top-right pixels below
-        top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
-
-        // predict and add residuals for all 4x4 blocks in turn.
-        for (n = 0; n < 16; ++n, bits <<= 2) {
-          uint8_t* const dst = y_dst + kScan[n];
-          VP8PredLuma4[block->imodes_[n]](dst);
-          DoTransform(bits, coeffs + n * 16, dst);
-        }
-      } else {    // 16x16
-        const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]);
-        VP8PredLuma16[pred_func](y_dst);
-        if (bits != 0) {
-          for (n = 0; n < 16; ++n, bits <<= 2) {
-            DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);
-          }
-        }
-      }
-      {
-        // Chroma
-        const uint32_t bits_uv = block->non_zero_uv_;
-        const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);
-        VP8PredChroma8[pred_func](u_dst);
-        VP8PredChroma8[pred_func](v_dst);
-        DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
-        DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);
-      }
-
-      // stash away top samples for next block
-      if (mb_y < dec->mb_h_ - 1) {
-        memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
-        memcpy(top_yuv[0].u, u_dst +  7 * BPS,  8);
-        memcpy(top_yuv[0].v, v_dst +  7 * BPS,  8);
-      }
-    }
-    // Transfer reconstructed samples from yuv_b_ cache to final destination.
-    {
-      const int y_offset = cache_id * 16 * dec->cache_y_stride_;
-      const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
-      uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;
-      uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;
-      uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;
-      for (j = 0; j < 16; ++j) {
-        memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
-      }
-      for (j = 0; j < 8; ++j) {
-        memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
-        memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
-      }
-    }
-  }
-}
+#define ALIGN_MASK (32 - 1)

 //------------------------------------------------------------------------------
 // Filtering
@ -200,18 +31,25 @@ static void ReconstructRow(const VP8Decoder* const dec,
 //                 U/V, so it's 8 samples total (because of the 2x upsampling).
 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };

+static WEBP_INLINE int hev_thresh_from_level(int level, int keyframe) {
+  if (keyframe) {
+    return (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+  } else {
+    return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0;
+  }
+}
+
 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
-  const int cache_id = ctx->id_;
  const int y_bps = dec->cache_y_stride_;
-  const VP8FInfo* const f_info = ctx->f_info_ + mb_x;
-  uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16;
+  VP8FInfo* const f_info = ctx->f_info_ + mb_x;
+  uint8_t* const y_dst = dec->cache_y_ + ctx->id_ * 16 * y_bps + mb_x * 16;
+  const int level = f_info->f_level_;
  const int ilevel = f_info->f_ilevel_;
-  const int limit = f_info->f_limit_;
-  if (limit == 0) {
+  const int limit = 2 * level + ilevel;
+  if (level == 0) {
    return;
  }
-  assert(limit >= 3);
  if (dec->filter_type_ == 1) {   // simple
    if (mb_x > 0) {
      VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
@ -227,9 +65,10 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
    }
  } else {    // complex
    const int uv_bps = dec->cache_uv_stride_;
-    uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
-    uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
-    const int hev_thresh = f_info->hev_thresh_;
+    uint8_t* const u_dst = dec->cache_u_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
+    uint8_t* const v_dst = dec->cache_v_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
+    const int hev_thresh =
+        hev_thresh_from_level(level, dec->frm_hdr_.key_frame_);
    if (mb_x > 0) {
      VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
      VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
@ -282,120 +121,32 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
        VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
        int level = base_level;
        if (hdr->use_lf_delta_) {
+          // TODO(skal): only CURRENT is handled for now.
          level += hdr->ref_lf_delta_[0];
          if (i4x4) {
            level += hdr->mode_lf_delta_[0];
          }
        }
        level = (level < 0) ? 0 : (level > 63) ? 63 : level;
-        if (level > 0) {
-          int ilevel = level;
-          if (hdr->sharpness_ > 0) {
-            if (hdr->sharpness_ > 4) {
-              ilevel >>= 2;
-            } else {
-              ilevel >>= 1;
-            }
-            if (ilevel > 9 - hdr->sharpness_) {
-              ilevel = 9 - hdr->sharpness_;
-            }
+        info->f_level_ = level;
+
+        if (hdr->sharpness_ > 0) {
+          if (hdr->sharpness_ > 4) {
+            level >>= 2;
+          } else {
+            level >>= 1;
+          }
+          if (level > 9 - hdr->sharpness_) {
+            level = 9 - hdr->sharpness_;
          }
-          if (ilevel < 1) ilevel = 1;
-          info->f_ilevel_ = ilevel;
-          info->f_limit_ = 2 * level + ilevel;
-          info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
-        } else {
-          info->f_limit_ = 0;  // no filtering
        }
-        info->f_inner_ = i4x4;
+        info->f_ilevel_ = (level < 1) ? 1 : level;
+        info->f_inner_ = 0;
      }
    }
  }
 }

-//------------------------------------------------------------------------------
-// Dithering
-
-#define DITHER_AMP_TAB_SIZE 12
-static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
-  // roughly, it's dqm->uv_mat_[1]
-  8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
-};
-
-void VP8InitDithering(const WebPDecoderOptions* const options,
-                      VP8Decoder* const dec) {
-  assert(dec != NULL);
-  if (options != NULL) {
-    const int d = options->dithering_strength;
-    const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;
-    const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);
-    if (f > 0) {
-      int s;
-      int all_amp = 0;
-      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        VP8QuantMatrix* const dqm = &dec->dqm_[s];
-        if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
-          // TODO(skal): should we specially dither more for uv_quant_ < 0?
-          const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
-          dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
-        }
-        all_amp |= dqm->dither_;
-      }
-      if (all_amp != 0) {
-        VP8InitRandom(&dec->dithering_rg_, 1.0f);
-        dec->dither_ = 1;
-      }
-    }
-    // potentially allow alpha dithering
-    dec->alpha_dithering_ = options->alpha_dithering_strength;
-    if (dec->alpha_dithering_ > 100) {
-      dec->alpha_dithering_ = 100;
-    } else if (dec->alpha_dithering_ < 0) {
-      dec->alpha_dithering_ = 0;
-    }
-  }
-}
-
-// minimal amp that will provide a non-zero dithering effect
-#define MIN_DITHER_AMP 4
-#define DITHER_DESCALE 4
-#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))
-#define DITHER_AMP_BITS 8
-#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)
-
-static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
-  int i, j;
-  for (j = 0; j < 8; ++j) {
-    for (i = 0; i < 8; ++i) {
-      // TODO: could be made faster with SSE2
-      const int bits =
-          VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;
-      // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
-      const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;
-      const int v = (int)dst[i] + delta;
-      dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;
-    }
-    dst += bps;
-  }
-}
-
-static void DitherRow(VP8Decoder* const dec) {
-  int mb_x;
-  assert(dec->dither_);
-  for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
-    const VP8ThreadContext* const ctx = &dec->thread_ctx_;
-    const VP8MBData* const data = ctx->mb_data_ + mb_x;
-    const int cache_id = ctx->id_;
-    const int uv_bps = dec->cache_uv_stride_;
-    if (data->dither_ >= MIN_DITHER_AMP) {
-      uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
-      uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
-      Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);
-      Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);
-    }
-  }
-}
-
 //------------------------------------------------------------------------------
 // This function is called after a row of macroblocks is finished decoding.
 // It also takes into account the following restrictions:
@ -413,35 +164,25 @@ static void DitherRow(VP8Decoder* const dec) {
 static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
  int ok = 1;
  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
-  const int cache_id = ctx->id_;
  const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
  const int ysize = extra_y_rows * dec->cache_y_stride_;
  const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
-  const int y_offset = cache_id * 16 * dec->cache_y_stride_;
-  const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
+  const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_;
+  const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_;
  uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
  uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
  uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
-  const int mb_y = ctx->mb_y_;
-  const int is_first_row = (mb_y == 0);
-  const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);
-
-  if (dec->mt_method_ == 2) {
-    ReconstructRow(dec, ctx);
-  }
+  const int first_row = (ctx->mb_y_ == 0);
+  const int last_row = (ctx->mb_y_ >= dec->br_mb_y_ - 1);
+  int y_start = MACROBLOCK_VPOS(ctx->mb_y_);
+  int y_end = MACROBLOCK_VPOS(ctx->mb_y_ + 1);

  if (ctx->filter_row_) {
    FilterRow(dec);
  }

-  if (dec->dither_) {
-    DitherRow(dec);
-  }
-
-  if (io->put != NULL) {
-    int y_start = MACROBLOCK_VPOS(mb_y);
-    int y_end = MACROBLOCK_VPOS(mb_y + 1);
-    if (!is_first_row) {
+  if (io->put) {
+    if (!first_row) {
      y_start -= extra_y_rows;
      io->y = ydst;
      io->u = udst;
@ -452,7 +193,7 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
      io->v = dec->cache_v_ + uv_offset;
    }

-    if (!is_last_row) {
+    if (!last_row) {
      y_end -= extra_y_rows;
    }
    if (y_end > io->crop_bottom) {
@ -460,8 +201,11 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
    }
    io->a = NULL;
    if (dec->alpha_data_ != NULL && y_start < y_end) {
-      // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a
-      // good idea.
+      // TODO(skal): several things to correct here:
+      // * testing presence of alpha with dec->alpha_data_ is not a good idea
+      // * we're actually decompressing the full plane only once. It should be
+      //   more obvious from signature.
+      // * we could free alpha_data_ right after this call, but we don't own.
      io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
      if (io->a == NULL) {
        return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
@ -493,8 +237,8 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
    }
  }
  // rotate top samples if needed
-  if (cache_id + 1 == dec->num_caches_) {
-    if (!is_last_row) {
+  if (ctx->id_ + 1 == dec->num_caches_) {
+    if (!last_row) {
      memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
      memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
      memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
@ -511,40 +255,27 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
  int ok = 1;
  VP8ThreadContext* const ctx = &dec->thread_ctx_;
-  const int filter_row =
-      (dec->filter_type_ > 0) &&
-      (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
-  if (dec->mt_method_ == 0) {
+  if (!dec->use_threads_) {
    // ctx->id_ and ctx->f_info_ are already set
    ctx->mb_y_ = dec->mb_y_;
-    ctx->filter_row_ = filter_row;
-    ReconstructRow(dec, ctx);
+    ctx->filter_row_ = dec->filter_row_;
    ok = FinishRow(dec, io);
  } else {
    WebPWorker* const worker = &dec->worker_;
    // Finish previous job *before* updating context
-    ok &= WebPGetWorkerInterface()->Sync(worker);
+    ok &= WebPWorkerSync(worker);
    assert(worker->status_ == OK);
    if (ok) {   // spawn a new deblocking/output job
      ctx->io_ = *io;
      ctx->id_ = dec->cache_id_;
      ctx->mb_y_ = dec->mb_y_;
-      ctx->filter_row_ = filter_row;
-      if (dec->mt_method_ == 2) {  // swap macroblock data
-        VP8MBData* const tmp = ctx->mb_data_;
-        ctx->mb_data_ = dec->mb_data_;
-        dec->mb_data_ = tmp;
-      } else {
-        // perform reconstruction directly in main thread
-        ReconstructRow(dec, ctx);
-      }
-      if (filter_row) {            // swap filter info
+      ctx->filter_row_ = dec->filter_row_;
+      if (ctx->filter_row_) {    // just swap filter info
        VP8FInfo* const tmp = ctx->f_info_;
        ctx->f_info_ = dec->f_info_;
        dec->f_info_ = tmp;
      }
-      // (reconstruct)+filter in parallel
-      WebPGetWorkerInterface()->Launch(worker);
+      WebPWorkerLaunch(worker);
      if (++dec->cache_id_ == dec->num_caches_) {
        dec->cache_id_ = 0;
      }
@ -558,8 +289,8 @@ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {

 VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
  // Call setup() first. This may trigger additional decoding features on 'io'.
-  // Note: Afterward, we must call teardown() no matter what.
-  if (io->setup != NULL && !io->setup(io)) {
+  // Note: Afterward, we must call teardown() not matter what.
+  if (io->setup && !io->setup(io)) {
    VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
    return dec->status_;
  }
@ -572,7 +303,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {

  // Define the area where we can skip in-loop filtering, in case of cropping.
  //
-  // 'Simple' filter reads two luma samples outside of the macroblock
+  // 'Simple' filter reads two luma samples outside of the macroblock and
  // and filters one. It doesn't filter the chroma samples. Hence, we can
  // avoid doing the in-loop filtering before crop_top/crop_left position.
  // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.
@ -613,11 +344,11 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {

 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
  int ok = 1;
-  if (dec->mt_method_ > 0) {
-    ok = WebPGetWorkerInterface()->Sync(&dec->worker_);
+  if (dec->use_threads_) {
+    ok = WebPWorkerSync(&dec->worker_);
  }

-  if (io->teardown != NULL) {
+  if (io->teardown) {
    io->teardown(io);
  }
  return ok;
@ -653,9 +384,9 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
 // Initialize multi/single-thread worker
 static int InitThreadContext(VP8Decoder* const dec) {
  dec->cache_id_ = 0;
-  if (dec->mt_method_ > 0) {
+  if (dec->use_threads_) {
    WebPWorker* const worker = &dec->worker_;
-    if (!WebPGetWorkerInterface()->Reset(worker)) {
+    if (!WebPWorkerReset(worker)) {
      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
                         "thread initialization failed.");
    }
@ -670,28 +401,6 @@ static int InitThreadContext(VP8Decoder* const dec) {
  return 1;
 }

-int VP8GetThreadMethod(const WebPDecoderOptions* const options,
-                       const WebPHeaderStructure* const headers,
-                       int width, int height) {
-  if (options == NULL || options->use_threads == 0) {
-    return 0;
-  }
-  (void)headers;
-  (void)width;
-  (void)height;
-  assert(headers == NULL || !headers->is_lossless);
-#if defined(WEBP_USE_THREAD)
-  if (width < MIN_WIDTH_FOR_THREADS) return 0;
-  // TODO(skal): tune the heuristic further
-#if 0
-  if (height < 2 * width) return 2;
-#endif
-  return 2;
-#else   // !WEBP_USE_THREAD
-  return 0;
-#endif
-}
-
 #undef MT_CACHE_LINES
 #undef ST_CACHE_LINES

@ -703,15 +412,14 @@ static int AllocateMemory(VP8Decoder* const dec) {
  const int mb_w = dec->mb_w_;
  // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
  const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
-  const size_t top_size = sizeof(VP8TopSamples) * mb_w;
+  const size_t top_size = (16 + 8 + 8) * mb_w;
  const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
  const size_t f_info_size =
      (dec->filter_type_ > 0) ?
-          mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo)
+          mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
        : 0;
  const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
-  const size_t mb_data_size =
-      (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
+  const size_t coeffs_size = 384 * sizeof(*dec->coeffs_);
  const size_t cache_height = (16 * num_caches
                            + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
  const size_t cache_size = top_size * cache_height;
@ -720,13 +428,13 @@ static int AllocateMemory(VP8Decoder* const dec) {
      (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
  const uint64_t needed = (uint64_t)intra_pred_mode_size
                        + top_size + mb_info_size + f_info_size
-                        + yuv_size + mb_data_size
-                        + cache_size + alpha_size + WEBP_ALIGN_CST;
+                        + yuv_size + coeffs_size
+                        + cache_size + alpha_size + ALIGN_MASK;
  uint8_t* mem;

  if (needed != (size_t)needed) return 0;  // check for overflow
  if (needed > dec->mem_size_) {
-    WebPSafeFree(dec->mem_);
+    free(dec->mem_);
    dec->mem_size_ = 0;
    dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
    if (dec->mem_ == NULL) {
@ -741,8 +449,12 @@ static int AllocateMemory(VP8Decoder* const dec) {
  dec->intra_t_ = (uint8_t*)mem;
  mem += intra_pred_mode_size;

-  dec->yuv_t_ = (VP8TopSamples*)mem;
-  mem += top_size;
+  dec->y_t_ = (uint8_t*)mem;
+  mem += 16 * mb_w;
+  dec->u_t_ = (uint8_t*)mem;
+  mem += 8 * mb_w;
+  dec->v_t_ = (uint8_t*)mem;
+  mem += 8 * mb_w;

  dec->mb_info_ = ((VP8MB*)mem) + 1;
  mem += mb_info_size;
@ -751,24 +463,20 @@ static int AllocateMemory(VP8Decoder* const dec) {
  mem += f_info_size;
  dec->thread_ctx_.id_ = 0;
  dec->thread_ctx_.f_info_ = dec->f_info_;
-  if (dec->mt_method_ > 0) {
+  if (dec->use_threads_) {
    // secondary cache line. The deblocking process need to make use of the
    // filtering strength from previous macroblock row, while the new ones
    // are being decoded in parallel. We'll just swap the pointers.
    dec->thread_ctx_.f_info_ += mb_w;
  }

-  mem = (uint8_t*)WEBP_ALIGN(mem);
-  assert((yuv_size & WEBP_ALIGN_CST) == 0);
+  mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
+  assert((yuv_size & ALIGN_MASK) == 0);
  dec->yuv_b_ = (uint8_t*)mem;
  mem += yuv_size;

-  dec->mb_data_ = (VP8MBData*)mem;
-  dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
-  if (dec->mt_method_ == 2) {
-    dec->thread_ctx_.mb_data_ += mb_w;
-  }
-  mem += mb_data_size;
+  dec->coeffs_ = (int16_t*)mem;
+  mem += coeffs_size;

  dec->cache_y_stride_ = 16 * mb_w;
  dec->cache_uv_stride_ = 8 * mb_w;
@ -790,9 +498,8 @@ static int AllocateMemory(VP8Decoder* const dec) {
  mem += alpha_size;
  assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);

-  // note: left/top-info is initialized once for all.
+  // note: left-info is initialized once for all.
  memset(dec->mb_info_ - 1, 0, mb_info_size);
-  VP8InitScanline(dec);   // initialize left too.

  // initialize top
  memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
@ -811,7 +518,7 @@ static void InitIo(VP8Decoder* const dec, VP8Io* io) {
  io->a = NULL;
 }

-int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
  if (!InitThreadContext(dec)) return 0;  // call first. Sets dec->num_caches_.
  if (!AllocateMemory(dec)) return 0;
  InitIo(dec, io);
@ -820,3 +527,168 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {
 }

 //------------------------------------------------------------------------------
+// Main reconstruction function.
+
+static const int kScan[16] = {
+  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
+  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
+  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
+  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
+};
+
+static WEBP_INLINE int CheckMode(VP8Decoder* const dec, int mode) {
+  if (mode == B_DC_PRED) {
+    if (dec->mb_x_ == 0) {
+      return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
+    } else {
+      return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
+    }
+  }
+  return mode;
+}
+
+static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
+  *(uint32_t*)dst = *(uint32_t*)src;
+}
+
+void VP8ReconstructBlock(VP8Decoder* const dec) {
+  int j;
+  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
+  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
+  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
+
+  // Rotate in the left samples from previously decoded block. We move four
+  // pixels at a time for alignment reason, and because of in-loop filter.
+  if (dec->mb_x_ > 0) {
+    for (j = -1; j < 16; ++j) {
+      Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
+    }
+    for (j = -1; j < 8; ++j) {
+      Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
+      Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
+    }
+  } else {
+    for (j = 0; j < 16; ++j) {
+      y_dst[j * BPS - 1] = 129;
+    }
+    for (j = 0; j < 8; ++j) {
+      u_dst[j * BPS - 1] = 129;
+      v_dst[j * BPS - 1] = 129;
+    }
+    // Init top-left sample on left column too
+    if (dec->mb_y_ > 0) {
+      y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
+    }
+  }
+  {
+    // bring top samples into the cache
+    uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;
+    uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;
+    uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;
+    const int16_t* coeffs = dec->coeffs_;
+    int n;
+
+    if (dec->mb_y_ > 0) {
+      memcpy(y_dst - BPS, top_y, 16);
+      memcpy(u_dst - BPS, top_u, 8);
+      memcpy(v_dst - BPS, top_v, 8);
+    } else if (dec->mb_x_ == 0) {
+      // we only need to do this init once at block (0,0).
+      // Afterward, it remains valid for the whole topmost row.
+      memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
+      memset(u_dst - BPS - 1, 127, 8 + 1);
+      memset(v_dst - BPS - 1, 127, 8 + 1);
+    }
+
+    // predict and add residuals
+
+    if (dec->is_i4x4_) {   // 4x4
+      uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
+
+      if (dec->mb_y_ > 0) {
+        if (dec->mb_x_ >= dec->mb_w_ - 1) {    // on rightmost border
+          top_right[0] = top_y[15] * 0x01010101u;
+        } else {
+          memcpy(top_right, top_y + 16, sizeof(*top_right));
+        }
+      }
+      // replicate the top-right pixels below
+      top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
+
+      // predict and add residues for all 4x4 blocks in turn.
+      for (n = 0; n < 16; n++) {
+        uint8_t* const dst = y_dst + kScan[n];
+        VP8PredLuma4[dec->imodes_[n]](dst);
+        if (dec->non_zero_ac_ & (1 << n)) {
+          VP8Transform(coeffs + n * 16, dst, 0);
+        } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
+          VP8TransformDC(coeffs + n * 16, dst);
+        }
+      }
+    } else {    // 16x16
+      const int pred_func = CheckMode(dec, dec->imodes_[0]);
+      VP8PredLuma16[pred_func](y_dst);
+      if (dec->non_zero_) {
+        for (n = 0; n < 16; n++) {
+          uint8_t* const dst = y_dst + kScan[n];
+          if (dec->non_zero_ac_ & (1 << n)) {
+            VP8Transform(coeffs + n * 16, dst, 0);
+          } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
+            VP8TransformDC(coeffs + n * 16, dst);
+          }
+        }
+      }
+    }
+    {
+      // Chroma
+      const int pred_func = CheckMode(dec, dec->uvmode_);
+      VP8PredChroma8[pred_func](u_dst);
+      VP8PredChroma8[pred_func](v_dst);
+
+      if (dec->non_zero_ & 0x0f0000) {   // chroma-U
+        const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16;
+        if (dec->non_zero_ac_ & 0x0f0000) {
+          VP8TransformUV(u_coeffs, u_dst);
+        } else {
+          VP8TransformDCUV(u_coeffs, u_dst);
+        }
+      }
+      if (dec->non_zero_ & 0xf00000) {   // chroma-V
+        const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16;
+        if (dec->non_zero_ac_ & 0xf00000) {
+          VP8TransformUV(v_coeffs, v_dst);
+        } else {
+          VP8TransformDCUV(v_coeffs, v_dst);
+        }
+      }
+
+      // stash away top samples for next block
+      if (dec->mb_y_ < dec->mb_h_ - 1) {
+        memcpy(top_y, y_dst + 15 * BPS, 16);
+        memcpy(top_u, u_dst +  7 * BPS,  8);
+        memcpy(top_v, v_dst +  7 * BPS,  8);
+      }
+    }
+  }
+  // Transfer reconstructed samples from yuv_b_ cache to final destination.
+  {
+    const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
+    const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
+    uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
+    uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
+    uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
+    for (j = 0; j < 16; ++j) {
+      memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
+    }
+    for (j = 0; j < 8; ++j) {
+      memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
+      memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/idec.c
+++ b/src/dec/idec.c
@ -15,11 +15,14 @@
 #include <string.h>
 #include <stdlib.h>

-#include "./alphai.h"
 #include "./webpi.h"
 #include "./vp8i.h"
 #include "../utils/utils.h"

+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 // In append mode, buffer allocations increase as multiples of this value.
 // Needs to be a power of 2.
 #define CHUNK_SIZE 4096
@ -28,13 +31,11 @@
 //------------------------------------------------------------------------------
 // Data structures for memory and states

-// Decoding states. State normally flows as:
-// WEBP_HEADER->VP8_HEADER->VP8_PARTS0->VP8_DATA->DONE for a lossy image, and
-// WEBP_HEADER->VP8L_HEADER->VP8L_DATA->DONE for a lossless image.
+// Decoding states. State normally flows like HEADER->PARTS0->DATA->DONE.
 // If there is any error the decoder goes into state ERROR.
 typedef enum {
-  STATE_WEBP_HEADER,  // All the data before that of the VP8/VP8L chunk.
-  STATE_VP8_HEADER,   // The VP8 Frame header (within the VP8 chunk).
+  STATE_PRE_VP8,  // All data before that of the first VP8 chunk.
+  STATE_VP8_FRAME_HEADER,  // For VP8 Frame header (within VP8 chunk).
  STATE_VP8_PARTS0,
  STATE_VP8_DATA,
  STATE_VP8L_HEADER,
@ -72,20 +73,28 @@ struct WebPIDecoder {
  MemBuffer mem_;          // input memory buffer.
  WebPDecBuffer output_;   // output buffer (when no external one is supplied)
  size_t chunk_size_;      // Compressed VP8/VP8L size extracted from Header.
-
-  int last_mb_y_;          // last row reached for intra-mode decoding
 };

 // MB context to restore in case VP8DecodeMB() fails
 typedef struct {
  VP8MB left_;
  VP8MB info_;
+  uint8_t intra_t_[4];
+  uint8_t intra_l_[4];
+  VP8BitReader br_;
  VP8BitReader token_br_;
 } MBContext;

 //------------------------------------------------------------------------------
 // MemBuffer: incoming data handling

+static void RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) {
+  if (br->buf_ != NULL) {
+    br->buf_ += offset;
+    br->buf_end_ += offset;
+  }
+}
+
 static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
  return (mem->end_ - mem->start_);
 }
@ -93,7 +102,7 @@ static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
 // Check if we need to preserve the compressed alpha data, as it may not have
 // been decoded yet.
 static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
-  if (idec->state_ == STATE_WEBP_HEADER) {
+  if (idec->state_ == STATE_PRE_VP8) {
    // We haven't parsed the headers yet, so we don't know whether the image is
    // lossy or lossless. This also means that we haven't parsed the ALPH chunk.
    return 0;
@ -102,7 +111,7 @@ static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
    return 0;  // ALPH chunk is not present for lossless images.
  } else {
    const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
-    assert(dec != NULL);  // Must be true as idec->state_ != STATE_WEBP_HEADER.
+    assert(dec != NULL);  // Must be true as idec->state_ != STATE_PRE_VP8.
    return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_;
  }
 }
@ -122,36 +131,17 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
      if (offset != 0) {
        int p;
        for (p = 0; p <= last_part; ++p) {
-          VP8RemapBitReader(dec->parts_ + p, offset);
+          RemapBitReader(dec->parts_ + p, offset);
        }
        // Remap partition #0 data pointer to new offset, but only in MAP
        // mode (in APPEND mode, partition #0 is copied into a fixed memory).
        if (mem->mode_ == MEM_MODE_MAP) {
-          VP8RemapBitReader(&dec->br_, offset);
-        }
-      }
-      {
-        const uint8_t* const last_start = dec->parts_[last_part].buf_;
-        assert(last_part >= 0);
-        VP8BitReaderSetBuffer(&dec->parts_[last_part], last_start,
-                              mem->buf_ + mem->end_ - last_start);
-      }
-      if (NeedCompressedAlpha(idec)) {
-        ALPHDecoder* const alph_dec = dec->alph_dec_;
-        dec->alpha_data_ += offset;
-        if (alph_dec != NULL) {
-          if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
-            VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
-            assert(alph_vp8l_dec != NULL);
-            assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
-            VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_,
-                                   dec->alpha_data_ + ALPHA_HEADER_LEN,
-                                   dec->alpha_data_size_ - ALPHA_HEADER_LEN);
-          } else {  // alph_dec->method_ == ALPHA_NO_COMPRESSION
-            // Nothing special to do in this case.
-          }
+          RemapBitReader(&dec->br_, offset);
        }
      }
+      assert(last_part >= 0);
+      dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_;
+      if (NeedCompressedAlpha(idec)) dec->alpha_data_ += offset;
    } else {    // Resize lossless bitreader
      VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
      VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
@ -185,7 +175,7 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
        (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
    if (new_buf == NULL) return 0;
    memcpy(new_buf, old_base, current_size);
-    WebPSafeFree(mem->buf_);
+    free(mem->buf_);
    mem->buf_ = new_buf;
    mem->buf_size_ = (size_t)extra_size;
    mem->start_ = new_mem_start;
@ -227,8 +217,8 @@ static void InitMemBuffer(MemBuffer* const mem) {
 static void ClearMemBuffer(MemBuffer* const mem) {
  assert(mem);
  if (mem->mode_ == MEM_MODE_APPEND) {
-    WebPSafeFree(mem->buf_);
-    WebPSafeFree((void*)mem->part0_buf_);
+    free(mem->buf_);
+    free((void*)mem->part0_buf_);
  }
 }

@ -242,34 +232,35 @@ static int CheckMemBufferMode(MemBuffer* const mem, MemBufferMode expected) {
  return 1;
 }

-// To be called last.
-static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
-  const WebPDecoderOptions* const options = idec->params_.options;
-  WebPDecBuffer* const output = idec->params_.output;
-
-  idec->state_ = STATE_DONE;
-  if (options != NULL && options->flip) {
-    return WebPFlipBuffer(output);
-  } else {
-    return VP8_STATUS_OK;
-  }
-}
-
 //------------------------------------------------------------------------------
 // Macroblock-decoding contexts

 static void SaveContext(const VP8Decoder* dec, const VP8BitReader* token_br,
                        MBContext* const context) {
-  context->left_ = dec->mb_info_[-1];
-  context->info_ = dec->mb_info_[dec->mb_x_];
+  const VP8BitReader* const br = &dec->br_;
+  const VP8MB* const left = dec->mb_info_ - 1;
+  const VP8MB* const info = dec->mb_info_ + dec->mb_x_;
+
+  context->left_ = *left;
+  context->info_ = *info;
+  context->br_ = *br;
  context->token_br_ = *token_br;
+  memcpy(context->intra_t_, dec->intra_t_ + 4 * dec->mb_x_, 4);
+  memcpy(context->intra_l_, dec->intra_l_, 4);
 }

 static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
                           VP8BitReader* const token_br) {
-  dec->mb_info_[-1] = context->left_;
-  dec->mb_info_[dec->mb_x_] = context->info_;
+  VP8BitReader* const br = &dec->br_;
+  VP8MB* const left = dec->mb_info_ - 1;
+  VP8MB* const info = dec->mb_info_ + dec->mb_x_;
+
+  *left = context->left_;
+  *info = context->info_;
+  *br = context->br_;
  *token_br = context->token_br_;
+  memcpy(dec->intra_t_ + 4 * dec->mb_x_, context->intra_t_, 4);
+  memcpy(dec->intra_l_, context->intra_l_, 4);
 }

 //------------------------------------------------------------------------------
@ -277,7 +268,7 @@ static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
 static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
  if (idec->state_ == STATE_VP8_DATA) {
    VP8Io* const io = &idec->io_;
-    if (io->teardown != NULL) {
+    if (io->teardown) {
      io->teardown(io);
    }
  }
@ -305,7 +296,6 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {

  headers.data = data;
  headers.data_size = curr_size;
-  headers.have_all_data = 0;
  status = WebPParseHeaders(&headers);
  if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
    return VP8_STATUS_SUSPENDED;  // We haven't found a VP8 chunk yet.
@ -321,9 +311,15 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
      return VP8_STATUS_OUT_OF_MEMORY;
    }
    idec->dec_ = dec;
+#ifdef WEBP_USE_THREAD
+    dec->use_threads_ = (idec->params_.options != NULL) &&
+                        (idec->params_.options->use_threads > 0);
+#else
+    dec->use_threads_ = 0;
+#endif
    dec->alpha_data_ = headers.alpha_data;
    dec->alpha_data_size_ = headers.alpha_data_size;
-    ChangeState(idec, STATE_VP8_HEADER, headers.offset);
+    ChangeState(idec, STATE_VP8_FRAME_HEADER, headers.offset);
  } else {
    VP8LDecoder* const dec = VP8LNew();
    if (dec == NULL) {
@ -338,14 +334,13 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
 static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
  const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
  const size_t curr_size = MemDataSize(&idec->mem_);
-  int width, height;
  uint32_t bits;

  if (curr_size < VP8_FRAME_HEADER_SIZE) {
    // Not enough data bytes to extract VP8 Frame Header.
    return VP8_STATUS_SUSPENDED;
  }
-  if (!VP8GetInfo(data, curr_size, idec->chunk_size_, &width, &height)) {
+  if (!VP8GetInfo(data, curr_size, idec->chunk_size_, NULL, NULL)) {
    return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
  }

@ -359,32 +354,30 @@ static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
 }

 // Partition #0
-static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) {
+static int CopyParts0Data(WebPIDecoder* const idec) {
  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
  VP8BitReader* const br = &dec->br_;
-  const size_t part_size = br->buf_end_ - br->buf_;
+  const size_t psize = br->buf_end_ - br->buf_;
  MemBuffer* const mem = &idec->mem_;
  assert(!idec->is_lossless_);
  assert(mem->part0_buf_ == NULL);
-  // the following is a format limitation, no need for runtime check:
-  assert(part_size <= mem->part0_size_);
-  if (part_size == 0) {   // can't have zero-size partition #0
-    return VP8_STATUS_BITSTREAM_ERROR;
-  }
+  assert(psize > 0);
+  assert(psize <= mem->part0_size_);  // Format limit: no need for runtime check
  if (mem->mode_ == MEM_MODE_APPEND) {
    // We copy and grab ownership of the partition #0 data.
-    uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, part_size);
+    uint8_t* const part0_buf = (uint8_t*)malloc(psize);
    if (part0_buf == NULL) {
-      return VP8_STATUS_OUT_OF_MEMORY;
+      return 0;
    }
-    memcpy(part0_buf, br->buf_, part_size);
+    memcpy(part0_buf, br->buf_, psize);
    mem->part0_buf_ = part0_buf;
-    VP8BitReaderSetBuffer(br, part0_buf, part_size);
+    br->buf_ = part0_buf;
+    br->buf_end_ = part0_buf + psize;
  } else {
    // Else: just keep pointers to the partition #0's data in dec_->br_.
  }
-  mem->start_ += part_size;
-  return VP8_STATUS_OK;
+  mem->start_ += psize;
+  return 1;
 }

 static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
@ -414,14 +407,9 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
  if (dec->status_ != VP8_STATUS_OK) {
    return IDecError(idec, dec->status_);
  }
-  // This change must be done before calling VP8InitFrame()
-  dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
-                                       io->width, io->height);
-  VP8InitDithering(params->options, dec);

-  dec->status_ = CopyParts0Data(idec);
-  if (dec->status_ != VP8_STATUS_OK) {
-    return IDecError(idec, dec->status_);
+  if (!CopyParts0Data(idec)) {
+    return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
  }

  // Finish setting up the decoding parameters. Will call io->setup().
@ -445,52 +433,49 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
  VP8Io* const io = &idec->io_;

  assert(dec->ready_);
+
  for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
-    if (idec->last_mb_y_ != dec->mb_y_) {
-      if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
-        // note: normally, error shouldn't occur since we already have the whole
-        // partition0 available here in DecodeRemaining(). Reaching EOF while
-        // reading intra modes really means a BITSTREAM_ERROR.
-        return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
-      }
-      idec->last_mb_y_ = dec->mb_y_;
+    VP8BitReader* token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+    if (dec->mb_x_ == 0) {
+      VP8InitScanline(dec);
    }
-    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
-      VP8BitReader* const token_br =
-          &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+    for (; dec->mb_x_ < dec->mb_w_;  dec->mb_x_++) {
      MBContext context;
      SaveContext(dec, token_br, &context);
+
      if (!VP8DecodeMB(dec, token_br)) {
+        RestoreContext(&context, dec, token_br);
        // We shouldn't fail when MAX_MB data was available
        if (dec->num_parts_ == 1 && MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
          return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
        }
-        RestoreContext(&context, dec, token_br);
        return VP8_STATUS_SUSPENDED;
      }
+      // Reconstruct and emit samples.
+      VP8ReconstructBlock(dec);
+
      // Release buffer only if there is only one partition
      if (dec->num_parts_ == 1) {
        idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
        assert(idec->mem_.start_ <= idec->mem_.end_);
      }
    }
-    VP8InitScanline(dec);   // Prepare for next scanline
-
-    // Reconstruct, filter and emit the row.
    if (!VP8ProcessRow(dec, io)) {
      return IDecError(idec, VP8_STATUS_USER_ABORT);
    }
+    dec->mb_x_ = 0;
  }
  // Synchronize the thread and check for errors.
  if (!VP8ExitCritical(dec, io)) {
    return IDecError(idec, VP8_STATUS_USER_ABORT);
  }
  dec->ready_ = 0;
-  return FinishDecoding(idec);
+  idec->state_ = STATE_DONE;
+
+  return VP8_STATUS_OK;
 }

-static VP8StatusCode ErrorStatusLossless(WebPIDecoder* const idec,
-                                         VP8StatusCode status) {
+static int ErrorStatusLossless(WebPIDecoder* const idec, VP8StatusCode status) {
  if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_NOT_ENOUGH_DATA) {
    return VP8_STATUS_SUSPENDED;
  }
@ -507,15 +492,9 @@ static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {

  // Wait until there's enough data for decoding header.
  if (curr_size < (idec->chunk_size_ >> 3)) {
-    dec->status_ = VP8_STATUS_SUSPENDED;
-    return ErrorStatusLossless(idec, dec->status_);
+    return VP8_STATUS_SUSPENDED;
  }
-
  if (!VP8LDecodeHeader(dec, io)) {
-    if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR &&
-        curr_size < idec->chunk_size_) {
-      dec->status_ = VP8_STATUS_SUSPENDED;
-    }
    return ErrorStatusLossless(idec, dec->status_);
  }
  // Allocate/verify output buffer now.
@ -534,29 +513,33 @@ static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
  const size_t curr_size = MemDataSize(&idec->mem_);
  assert(idec->is_lossless_);

-  // Switch to incremental decoding if we don't have all the bytes available.
-  dec->incremental_ = (curr_size < idec->chunk_size_);
+  // At present Lossless decoder can't decode image incrementally. So wait till
+  // all the image data is aggregated before image can be decoded.
+  if (curr_size < idec->chunk_size_) {
+    return VP8_STATUS_SUSPENDED;
+  }

  if (!VP8LDecodeImage(dec)) {
    return ErrorStatusLossless(idec, dec->status_);
  }
-  assert(dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED);
-  return (dec->status_ == VP8_STATUS_SUSPENDED) ? dec->status_
-                                                : FinishDecoding(idec);
+
+  idec->state_ = STATE_DONE;
+
+  return VP8_STATUS_OK;
 }

  // Main decoding loop
 static VP8StatusCode IDecode(WebPIDecoder* idec) {
  VP8StatusCode status = VP8_STATUS_SUSPENDED;

-  if (idec->state_ == STATE_WEBP_HEADER) {
+  if (idec->state_ == STATE_PRE_VP8) {
    status = DecodeWebPHeaders(idec);
  } else {
    if (idec->dec_ == NULL) {
      return VP8_STATUS_SUSPENDED;    // can't continue if we have no decoder.
    }
  }
-  if (idec->state_ == STATE_VP8_HEADER) {
+  if (idec->state_ == STATE_VP8_FRAME_HEADER) {
    status = DecodeVP8FrameHeader(idec);
  }
  if (idec->state_ == STATE_VP8_PARTS0) {
@ -578,23 +561,20 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
 // Public functions

 WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
-  WebPIDecoder* idec = (WebPIDecoder*)WebPSafeCalloc(1ULL, sizeof(*idec));
+  WebPIDecoder* idec = (WebPIDecoder*)calloc(1, sizeof(*idec));
  if (idec == NULL) {
    return NULL;
  }

-  idec->state_ = STATE_WEBP_HEADER;
+  idec->state_ = STATE_PRE_VP8;
  idec->chunk_size_ = 0;

-  idec->last_mb_y_ = -1;
-
  InitMemBuffer(&idec->mem_);
  WebPInitDecBuffer(&idec->output_);
  VP8InitIo(&idec->io_);

  WebPResetDecParams(&idec->params_);
-  idec->params_.output = (output_buffer != NULL) ? output_buffer
-                                                 : &idec->output_;
+  idec->params_.output = output_buffer ? output_buffer : &idec->output_;
  WebPInitCustomIo(&idec->params_, &idec->io_);  // Plug the I/O functions.

  return idec;
@ -628,16 +608,16 @@ void WebPIDelete(WebPIDecoder* idec) {
    if (!idec->is_lossless_) {
      if (idec->state_ == STATE_VP8_DATA) {
        // Synchronize the thread, clean-up and check for errors.
-        VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
+        VP8ExitCritical(idec->dec_, &idec->io_);
      }
-      VP8Delete((VP8Decoder*)idec->dec_);
+      VP8Delete(idec->dec_);
    } else {
-      VP8LDelete((VP8LDecoder*)idec->dec_);
+      VP8LDelete(idec->dec_);
    }
  }
  ClearMemBuffer(&idec->mem_);
  WebPFreeDecBuffer(&idec->output_);
-  WebPSafeFree(idec);
+  free(idec);
 }

 //------------------------------------------------------------------------------
@ -792,6 +772,7 @@ const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
  const WebPDecBuffer* const src = GetOutputBuffer(idec);
  if (left != NULL) *left = 0;
  if (top != NULL) *top = 0;
+  // TODO(skal): later include handling of rotations.
  if (src) {
    if (width != NULL) *width = src->width;
    if (height != NULL) *height = idec->params_.last_y;
@ -846,7 +827,7 @@ int WebPISetIOHooks(WebPIDecoder* const idec,
                    VP8IoSetupHook setup,
                    VP8IoTeardownHook teardown,
                    void* user_data) {
-  if (idec == NULL || idec->state_ > STATE_WEBP_HEADER) {
+  if (idec == NULL || idec->state_ > STATE_PRE_VP8) {
    return 0;
  }

@ -857,3 +838,7 @@ int WebPISetIOHooks(WebPIDecoder* const idec,

  return 1;
 }
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/io.c
+++ b/src/dec/io.c
@ -17,7 +17,10 @@
 #include "./webpi.h"
 #include "../dsp/dsp.h"
 #include "../dsp/yuv.h"
-#include "../utils/utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 //------------------------------------------------------------------------------
 // Main YUV<->RGB conversion functions
@ -45,16 +48,56 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {

 // Point-sampling U/V sampler.
 static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
-  WebPDecBuffer* const output = p->output;
-  WebPRGBABuffer* const buf = &output->u.RGBA;
-  uint8_t* const dst = buf->rgba + io->mb_y * buf->stride;
-  WebPSamplerProcessPlane(io->y, io->y_stride,
-                          io->u, io->v, io->uv_stride,
-                          dst, buf->stride, io->mb_w, io->mb_h,
-                          WebPSamplers[output->colorspace]);
+  WebPDecBuffer* output = p->output;
+  const WebPRGBABuffer* const buf = &output->u.RGBA;
+  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
+  const uint8_t* y_src = io->y;
+  const uint8_t* u_src = io->u;
+  const uint8_t* v_src = io->v;
+  const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace];
+  const int mb_w = io->mb_w;
+  const int last = io->mb_h - 1;
+  int j;
+  for (j = 0; j < last; j += 2) {
+    sample(y_src, y_src + io->y_stride, u_src, v_src,
+           dst, dst + buf->stride, mb_w);
+    y_src += 2 * io->y_stride;
+    u_src += io->uv_stride;
+    v_src += io->uv_stride;
+    dst += 2 * buf->stride;
+  }
+  if (j == last) {  // Just do the last line twice
+    sample(y_src, y_src, u_src, v_src, dst, dst, mb_w);
+  }
  return io->mb_h;
 }

+//------------------------------------------------------------------------------
+// YUV444 -> RGB conversion
+
+#if 0   // TODO(skal): this is for future rescaling.
+static int EmitRGB(const VP8Io* const io, WebPDecParams* const p) {
+  WebPDecBuffer* output = p->output;
+  const WebPRGBABuffer* const buf = &output->u.RGBA;
+  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
+  const uint8_t* y_src = io->y;
+  const uint8_t* u_src = io->u;
+  const uint8_t* v_src = io->v;
+  const WebPYUV444Converter convert = WebPYUV444Converters[output->colorspace];
+  const int mb_w = io->mb_w;
+  const int last = io->mb_h;
+  int j;
+  for (j = 0; j < last; ++j) {
+    convert(y_src, u_src, v_src, dst, mb_w);
+    y_src += io->y_stride;
+    u_src += io->uv_stride;
+    v_src += io->uv_stride;
+    dst += buf->stride;
+  }
+  return io->mb_h;
+}
+#endif
+
 //------------------------------------------------------------------------------
 // Fancy upsampling

@ -76,7 +119,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {

  if (y == 0) {
    // First line is special cased. We mirror the u/v samples at boundary.
-    upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w);
+    upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, mb_w);
  } else {
    // We can finish the left-over line from previous call.
    upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
@ -119,16 +162,14 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {

 //------------------------------------------------------------------------------

-static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
-                        int expected_num_lines_out) {
+static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
  const uint8_t* alpha = io->a;
  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
  const int mb_w = io->mb_w;
  const int mb_h = io->mb_h;
  uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
  int j;
-  (void)expected_num_lines_out;
-  assert(expected_num_lines_out == mb_h);
+
  if (alpha != NULL) {
    for (j = 0; j < mb_h; ++j) {
      memcpy(dst, alpha, mb_w * sizeof(*dst));
@ -171,8 +212,7 @@ static int GetAlphaSourceRow(const VP8Io* const io,
  return start_y;
 }

-static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
-                        int expected_num_lines_out) {
+static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
  const uint8_t* alpha = io->a;
  if (alpha != NULL) {
    const int mb_w = io->mb_w;
@ -183,13 +223,21 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
    int num_rows;
    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
-    uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
-    const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w,
-                                            num_rows, dst, buf->stride);
-    (void)expected_num_lines_out;
-    assert(expected_num_lines_out == num_rows);
-    // has_alpha is true if there's non-trivial alpha to premultiply with.
-    if (has_alpha && WebPIsPremultipliedMode(colorspace)) {
+    uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
+    uint32_t alpha_mask = 0xff;
+    int i, j;
+
+    for (j = 0; j < num_rows; ++j) {
+      for (i = 0; i < mb_w; ++i) {
+        const uint32_t alpha_value = alpha[i];
+        dst[4 * i] = alpha_value;
+        alpha_mask &= alpha_value;
+      }
+      alpha += io->width;
+      dst += buf->stride;
+    }
+    // alpha_mask is < 0xff if there's non-trivial alpha to premultiply with.
+    if (alpha_mask != 0xff && WebPIsPremultipliedMode(colorspace)) {
      WebPApplyAlphaMultiply(base_rgba, alpha_first,
                             mb_w, num_rows, buf->stride);
    }
@ -197,8 +245,7 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
  return 0;
 }

-static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
-                             int expected_num_lines_out) {
+static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
  const uint8_t* alpha = io->a;
  if (alpha != NULL) {
    const int mb_w = io->mb_w;
@ -207,13 +254,10 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
    int num_rows;
    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
-#ifdef WEBP_SWAP_16BIT_CSP
-    uint8_t* alpha_dst = base_rgba;
-#else
    uint8_t* alpha_dst = base_rgba + 1;
-#endif
    uint32_t alpha_mask = 0x0f;
    int i, j;
+
    for (j = 0; j < num_rows; ++j) {
      for (i = 0; i < mb_w; ++i) {
        // Fill in the alpha value (converted to 4 bits).
@ -224,8 +268,6 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
      alpha += io->width;
      alpha_dst += buf->stride;
    }
-    (void)expected_num_lines_out;
-    assert(expected_num_lines_out == num_rows);
    if (alpha_mask != 0x0f && WebPIsPremultipliedMode(colorspace)) {
      WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
    }
@ -251,35 +293,15 @@ static int Rescale(const uint8_t* src, int src_stride,
 static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
  const int mb_h = io->mb_h;
  const int uv_mb_h = (mb_h + 1) >> 1;
-  WebPRescaler* const scaler = &p->scaler_y;
-  int num_lines_out = 0;
-  if (WebPIsAlphaMode(p->output->colorspace) && io->a != NULL) {
-    // Before rescaling, we premultiply the luma directly into the io->y
-    // internal buffer. This is OK since these samples are not used for
-    // intra-prediction (the top samples are saved in cache_y_/u_/v_).
-    // But we need to cast the const away, though.
-    WebPMultRows((uint8_t*)io->y, io->y_stride,
-                 io->a, io->width, io->mb_w, mb_h, 0);
-  }
-  num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler);
+  const int num_lines_out = Rescale(io->y, io->y_stride, mb_h, &p->scaler_y);
  Rescale(io->u, io->uv_stride, uv_mb_h, &p->scaler_u);
  Rescale(io->v, io->uv_stride, uv_mb_h, &p->scaler_v);
  return num_lines_out;
 }

-static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
-                                int expected_num_lines_out) {
+static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
  if (io->a != NULL) {
-    const WebPYUVABuffer* const buf = &p->output->u.YUVA;
-    uint8_t* dst_y = buf->y + p->last_y * buf->y_stride;
-    const uint8_t* src_a = buf->a + p->last_y * buf->a_stride;
-    const int num_lines_out = Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
-    (void)expected_num_lines_out;
-    assert(expected_num_lines_out == num_lines_out);
-    if (num_lines_out > 0) {   // unmultiply the Y
-      WebPMultRows(dst_y, buf->y_stride, src_a, buf->a_stride,
-                   p->scaler_a.dst_width, num_lines_out, 1);
-    }
+    Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
  }
  return 0;
 }
@ -296,34 +318,39 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
  const size_t work_size = 2 * out_width;   // scratch memory for luma rescaler
  const size_t uv_work_size = 2 * uv_out_width;  // and for each u/v ones
  size_t tmp_size;
-  rescaler_t* work;
+  int32_t* work;

-  tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
+  tmp_size = work_size + 2 * uv_work_size;
  if (has_alpha) {
-    tmp_size += work_size * sizeof(*work);
+    tmp_size += work_size;
  }
-  p->memory = WebPSafeMalloc(1ULL, tmp_size);
+  p->memory = calloc(1, tmp_size * sizeof(*work));
  if (p->memory == NULL) {
    return 0;   // memory error
  }
-  work = (rescaler_t*)p->memory;
+  work = (int32_t*)p->memory;
  WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
                   buf->y, out_width, out_height, buf->y_stride, 1,
+                   io->mb_w, out_width, io->mb_h, out_height,
                   work);
  WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
                   buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
+                   uv_in_width, uv_out_width,
+                   uv_in_height, uv_out_height,
                   work + work_size);
  WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
                   buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
+                   uv_in_width, uv_out_width,
+                   uv_in_height, uv_out_height,
                   work + work_size + uv_work_size);
  p->emit = EmitRescaledYUV;

  if (has_alpha) {
    WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
                     buf->a, out_width, out_height, buf->a_stride, 1,
+                     io->mb_w, out_width, io->mb_h, out_height,
                     work + work_size + 2 * uv_work_size);
    p->emit_alpha = EmitRescaledAlphaYUV;
-    WebPInitAlphaProcessing();
  }
  return 1;
 }
@ -335,13 +362,13 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
  const WebPYUV444Converter convert =
      WebPYUV444Converters[p->output->colorspace];
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* dst = buf->rgba + y_pos * buf->stride;
+  uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride;
  int num_lines_out = 0;
  // For RGB rescaling, because of the YUV420, current scan position
  // U/V can be +1/-1 line from the Y one.  Hence the double test.
  while (WebPRescalerHasPendingOutput(&p->scaler_y) &&
         WebPRescalerHasPendingOutput(&p->scaler_u)) {
-    assert(y_pos + num_lines_out < p->output->height);
+    assert(p->last_y + y_pos + num_lines_out < p->output->height);
    assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
    WebPRescalerExportRow(&p->scaler_y);
    WebPRescalerExportRow(&p->scaler_u);
@ -363,69 +390,65 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
    const int y_lines_in =
        WebPRescalerImport(&p->scaler_y, mb_h - j,
                           io->y + j * io->y_stride, io->y_stride);
+    const int u_lines_in =
+        WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j,
+                           io->u + uv_j * io->uv_stride, io->uv_stride);
+    const int v_lines_in =
+        WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j,
+                           io->v + uv_j * io->uv_stride, io->uv_stride);
+    (void)v_lines_in;   // remove a gcc warning
+    assert(u_lines_in == v_lines_in);
    j += y_lines_in;
-    if (WebPRescaleNeededLines(&p->scaler_u, uv_mb_h - uv_j)) {
-      const int u_lines_in =
-          WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j,
-                             io->u + uv_j * io->uv_stride, io->uv_stride);
-      const int v_lines_in =
-          WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j,
-                             io->v + uv_j * io->uv_stride, io->uv_stride);
-      (void)v_lines_in;   // remove a gcc warning
-      assert(u_lines_in == v_lines_in);
-      uv_j += u_lines_in;
-    }
-    num_lines_out += ExportRGB(p, p->last_y + num_lines_out);
+    uv_j += u_lines_in;
+    num_lines_out += ExportRGB(p, num_lines_out);
  }
  return num_lines_out;
 }

-static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
+static int ExportAlpha(WebPDecParams* const p, int y_pos) {
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
+  uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
  const WEBP_CSP_MODE colorspace = p->output->colorspace;
  const int alpha_first =
      (colorspace == MODE_ARGB || colorspace == MODE_Argb);
  uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
  int num_lines_out = 0;
  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
-  uint32_t non_opaque = 0;
+  uint32_t alpha_mask = 0xff;
  const int width = p->scaler_a.dst_width;

-  while (WebPRescalerHasPendingOutput(&p->scaler_a) &&
-         num_lines_out < max_lines_out) {
-    assert(y_pos + num_lines_out < p->output->height);
+  while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
+    int i;
+    assert(p->last_y + y_pos + num_lines_out < p->output->height);
    WebPRescalerExportRow(&p->scaler_a);
-    non_opaque |= WebPDispatchAlpha(p->scaler_a.dst, 0, width, 1, dst, 0);
+    for (i = 0; i < width; ++i) {
+      const uint32_t alpha_value = p->scaler_a.dst[i];
+      dst[4 * i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
    dst += buf->stride;
    ++num_lines_out;
  }
-  if (is_premult_alpha && non_opaque) {
+  if (is_premult_alpha && alpha_mask != 0xff) {
    WebPApplyAlphaMultiply(base_rgba, alpha_first,
                           width, num_lines_out, buf->stride);
  }
  return num_lines_out;
 }

-static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
-                               int max_lines_out) {
+static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
-#ifdef WEBP_SWAP_16BIT_CSP
-  uint8_t* alpha_dst = base_rgba;
-#else
+  uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
  uint8_t* alpha_dst = base_rgba + 1;
-#endif
  int num_lines_out = 0;
  const WEBP_CSP_MODE colorspace = p->output->colorspace;
  const int width = p->scaler_a.dst_width;
  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
  uint32_t alpha_mask = 0x0f;

-  while (WebPRescalerHasPendingOutput(&p->scaler_a) &&
-         num_lines_out < max_lines_out) {
+  while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
    int i;
-    assert(y_pos + num_lines_out < p->output->height);
+    assert(p->last_y + y_pos + num_lines_out < p->output->height);
    WebPRescalerExportRow(&p->scaler_a);
    for (i = 0; i < width; ++i) {
      // Fill in the alpha value (converted to 4 bits).
@ -442,17 +465,15 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
  return num_lines_out;
 }

-static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
-                                int expected_num_out_lines) {
+static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
  if (io->a != NULL) {
    WebPRescaler* const scaler = &p->scaler_a;
-    int lines_left = expected_num_out_lines;
-    const int y_end = p->last_y + lines_left;
-    while (lines_left > 0) {
-      const int row_offset = scaler->src_y - io->mb_y;
-      WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y,
-                         io->a + row_offset * io->width, io->width);
-      lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left);
+    int j = 0;
+    int pos = 0;
+    while (j < io->mb_h) {
+      j += WebPRescalerImport(scaler, io->mb_h - j,
+                              io->a + j * io->width, io->width);
+      pos += p->emit_alpha_row(p, pos);
    }
  }
  return 0;
@ -465,9 +486,9 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
  const int uv_in_width  = (io->mb_w + 1) >> 1;
  const int uv_in_height = (io->mb_h + 1) >> 1;
  const size_t work_size = 2 * out_width;   // scratch memory for one rescaler
-  rescaler_t* work;  // rescalers work area
+  int32_t* work;  // rescalers work area
  uint8_t* tmp;   // tmp storage for scaled YUV444 samples before RGB conversion
-  size_t tmp_size1, tmp_size2, total_size;
+  size_t tmp_size1, tmp_size2;

  tmp_size1 = 3 * work_size;
  tmp_size2 = 3 * out_width;
@ -475,28 +496,30 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
    tmp_size1 += work_size;
    tmp_size2 += out_width;
  }
-  total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
-  p->memory = WebPSafeMalloc(1ULL, total_size);
+  p->memory = calloc(1, tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp));
  if (p->memory == NULL) {
    return 0;   // memory error
  }
-  work = (rescaler_t*)p->memory;
+  work = (int32_t*)p->memory;
  tmp = (uint8_t*)(work + tmp_size1);
  WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
                   tmp + 0 * out_width, out_width, out_height, 0, 1,
+                   io->mb_w, out_width, io->mb_h, out_height,
                   work + 0 * work_size);
  WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
                   tmp + 1 * out_width, out_width, out_height, 0, 1,
+                   io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
                   work + 1 * work_size);
  WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
                   tmp + 2 * out_width, out_width, out_height, 0, 1,
+                   io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
                   work + 2 * work_size);
  p->emit = EmitRescaledRGB;
-  WebPInitYUV444Converters();

  if (has_alpha) {
    WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
                     tmp + 3 * out_width, out_width, out_height, 0, 1,
+                     io->mb_w, out_width, io->mb_h, out_height,
                     work + 3 * work_size);
    p->emit_alpha = EmitRescaledAlphaRGB;
    if (p->output->colorspace == MODE_RGBA_4444 ||
@ -505,7 +528,6 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
    } else {
      p->emit_alpha_row = ExportAlpha;
    }
-    WebPInitAlphaProcessing();
  }
  return 1;
 }
@ -526,9 +548,7 @@ static int CustomSetup(VP8Io* io) {
  if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) {
    return 0;
  }
-  if (is_alpha && WebPIsPremultipliedMode(colorspace)) {
-    WebPInitUpsamplers();
-  }
+
  if (io->use_scaling) {
    const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
    if (!ok) {
@ -536,12 +556,11 @@ static int CustomSetup(VP8Io* io) {
    }
  } else {
    if (is_rgb) {
-      WebPInitSamplers();
      p->emit = EmitSampledRGB;   // default
-      if (io->fancy_upsampling) {
 #ifdef FANCY_UPSAMPLING
+      if (io->fancy_upsampling) {
        const int uv_width = (io->mb_w + 1) >> 1;
-        p->memory = WebPSafeMalloc(1ULL, (size_t)(io->mb_w + 2 * uv_width));
+        p->memory = malloc(io->mb_w + 2 * uv_width);
        if (p->memory == NULL) {
          return 0;   // memory error.
        }
@ -550,20 +569,18 @@ static int CustomSetup(VP8Io* io) {
        p->tmp_v = p->tmp_u + uv_width;
        p->emit = EmitFancyRGB;
        WebPInitUpsamplers();
-#endif
      }
+#endif
    } else {
      p->emit = EmitYUV;
    }
    if (is_alpha) {  // need transparency output
+      if (WebPIsPremultipliedMode(colorspace)) WebPInitPremultiply();
      p->emit_alpha =
          (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ?
              EmitAlphaRGBA4444
          : is_rgb ? EmitAlphaRGB
          : EmitAlphaYUV;
-      if (is_rgb) {
-        WebPInitAlphaProcessing();
-      }
    }
  }

@ -586,8 +603,8 @@ static int CustomPut(const VP8Io* io) {
    return 0;
  }
  num_lines_out = p->emit(io, p);
-  if (p->emit_alpha != NULL) {
-    p->emit_alpha(io, p, num_lines_out);
+  if (p->emit_alpha) {
+    p->emit_alpha(io, p);
  }
  p->last_y += num_lines_out;
  return 1;
@ -597,7 +614,7 @@ static int CustomPut(const VP8Io* io) {

 static void CustomTeardown(const VP8Io* io) {
  WebPDecParams* const p = (WebPDecParams*)io->opaque;
-  WebPSafeFree(p->memory);
+  free(p->memory);
  p->memory = NULL;
 }

@ -612,3 +629,7 @@ void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
 }

 //------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/layer.c
+++ b/src/dec/layer.c
@ -0,0 +1,37 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Enhancement layer (for YUV444/422)
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vp8i.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+
+int VP8DecodeLayer(VP8Decoder* const dec) {
+  assert(dec);
+  assert(dec->layer_data_size_ > 0);
+  (void)dec;
+
+  // TODO: handle enhancement layer here.
+
+  return 1;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/quant.c
+++ b/src/dec/quant.c
@ -13,6 +13,10 @@

 #include "./vp8i.h"

+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 static WEBP_INLINE int clip(int v, int M) {
  return v < 0 ? 0 : v > M ? M : v;
 }
@ -100,11 +104,12 @@ void VP8ParseQuant(VP8Decoder* const dec) {

      m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
      m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
-
-      m->uv_quant_ = q + dquv_ac;   // for dithering strength evaluation
    }
  }
 }

 //------------------------------------------------------------------------------

+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/tree.c
+++ b/src/dec/tree.c
@ -11,11 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "./vp8i.h"
-#include "../utils/bit_reader_inl.h"
+#include "vp8i.h"

 #define USE_GENERIC_TREE

+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 #ifdef USE_GENERIC_TREE
 static const int8_t kYModesIntra4[18] = {
  -B_DC_PRED, 1,
@ -30,12 +33,61 @@ static const int8_t kYModesIntra4[18] = {
 };
 #endif

+#ifndef ONLY_KEYFRAME_CODE
+
+// inter prediction modes
+enum {
+  LEFT4 = 0, ABOVE4 = 1, ZERO4 = 2, NEW4 = 3,
+  NEARESTMV, NEARMV, ZEROMV, NEWMV, SPLITMV };
+
+static const int8_t kYModesInter[8] = {
+  -DC_PRED, 1,
+    2, 3,
+      -V_PRED, -H_PRED,
+      -TM_PRED, -B_PRED
+};
+
+static const int8_t kMBSplit[6] = {
+  -3, 1,
+    -2, 2,
+      -0, -1
+};
+
+static const int8_t kMVRef[8] = {
+  -ZEROMV, 1,
+    -NEARESTMV, 2,
+      -NEARMV, 3,
+        -NEWMV, -SPLITMV
+};
+
+static const int8_t kMVRef4[6] = {
+  -LEFT4, 1,
+    -ABOVE4, 2,
+      -ZERO4, -NEW4
+};
+#endif
+
 //------------------------------------------------------------------------------
 // Default probabilities

+// Inter
+#ifndef ONLY_KEYFRAME_CODE
+static const uint8_t kYModeProbaInter0[4] = { 112, 86, 140, 37 };
+static const uint8_t kUVModeProbaInter0[3] = { 162, 101, 204 };
+static const uint8_t kMVProba0[2][NUM_MV_PROBAS] = {
+  { 162, 128, 225, 146, 172, 147, 214,  39,
+    156, 128, 129, 132,  75, 145, 178, 206,
+    239, 254, 254 },
+  { 164, 128, 204, 170, 119, 235, 140, 230,
+    228, 128, 130, 130,  74, 148, 180, 203,
+    236, 254, 254 }
+};
+#endif
+
 // Paragraph 13.5
 static const uint8_t
  CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  // genereated using vp8_default_coef_probs() in entropy.c:129
  { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
@ -276,38 +328,28 @@ static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {

 void VP8ResetProba(VP8Proba* const proba) {
  memset(proba->segments_, 255u, sizeof(proba->segments_));
-  // proba->bands_[][] is initialized later
+  memcpy(proba->coeffs_, CoeffsProba0, sizeof(CoeffsProba0));
+#ifndef ONLY_KEYFRAME_CODE
+  memcpy(proba->mv_, kMVProba0, sizeof(kMVProba0));
+  memcpy(proba->ymode_, kYModeProbaInter0, sizeof(kYModeProbaInter0));
+  memcpy(proba->uvmode_, kUVModeProbaInter0, sizeof(kUVModeProbaInter0));
+#endif
 }

-static void ParseIntraMode(VP8BitReader* const br,
-                           VP8Decoder* const dec, int mb_x) {
-  uint8_t* const top = dec->intra_t_ + 4 * mb_x;
+void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec) {
+  uint8_t* const top = dec->intra_t_ + 4 * dec->mb_x_;
  uint8_t* const left = dec->intra_l_;
-  VP8MBData* const block = dec->mb_data_ + mb_x;
-
-  // Note: we don't save segment map (yet), as we don't expect
-  // to decode more than 1 keyframe.
-  if (dec->segment_hdr_.update_map_) {
-    // Hardcoded tree parsing
-    block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0])
-                    ? VP8GetBit(br, dec->proba_.segments_[1])
-                    : 2 + VP8GetBit(br, dec->proba_.segments_[2]);
-  } else {
-    block->segment_ = 0;  // default for intra
-  }
-  if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_);
-
-  block->is_i4x4_ = !VP8GetBit(br, 145);   // decide for B_PRED first
-  if (!block->is_i4x4_) {
-    // Hardcoded 16x16 intra-mode decision tree.
+  // Hardcoded 16x16 intra-mode decision tree.
+  dec->is_i4x4_ = !VP8GetBit(br, 145);   // decide for B_PRED first
+  if (!dec->is_i4x4_) {
    const int ymode =
        VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED)
                           : (VP8GetBit(br, 163) ? V_PRED : DC_PRED);
-    block->imodes_[0] = ymode;
-    memset(top, ymode, 4 * sizeof(*top));
-    memset(left, ymode, 4 * sizeof(*left));
+    dec->imodes_[0] = ymode;
+    memset(top, ymode, 4 * sizeof(top[0]));
+    memset(left, ymode, 4 * sizeof(left[0]));
  } else {
-    uint8_t* modes = block->imodes_;
+    uint8_t* modes = dec->imodes_;
    int y;
    for (y = 0; y < 4; ++y) {
      int ymode = left[y];
@ -316,10 +358,10 @@ static void ParseIntraMode(VP8BitReader* const br,
        const uint8_t* const prob = kBModesProba[top[x]][ymode];
 #ifdef USE_GENERIC_TREE
        // Generic tree-parsing
-        int i = kYModesIntra4[VP8GetBit(br, prob[0])];
-        while (i > 0) {
+        int i = 0;
+        do {
          i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i])];
-        }
+        } while (i > 0);
        ymode = -i;
 #else
        // Hardcoded tree parsing
@ -334,24 +376,15 @@ static void ParseIntraMode(VP8BitReader* const br,
                            (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
 #endif    // USE_GENERIC_TREE
        top[x] = ymode;
+        *modes++ = ymode;
      }
-      memcpy(modes, top, 4 * sizeof(*top));
-      modes += 4;
      left[y] = ymode;
    }
  }
  // Hardcoded UVMode decision tree
-  block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
-                 : !VP8GetBit(br, 114) ? V_PRED
-                 : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
-}
-
-int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
-  int mb_x;
-  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
-    ParseIntraMode(br, dec, mb_x);
-  }
-  return !dec->br_.eof_;
+  dec->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
+               : !VP8GetBit(br, 114) ? V_PRED
+               : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
 }

 //------------------------------------------------------------------------------
@ -493,13 +526,18 @@ static const uint8_t
  }
 };

-// Paragraph 9.9
-
-static const int kBands[16 + 1] = {
-  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
-  0  // extra entry as sentinel
+#ifndef ONLY_KEYFRAME_CODE
+static const uint8_t MVUpdateProba[2][NUM_MV_PROBAS] = {
+  { 237, 246, 253, 253, 254, 254, 254, 254,
+    254, 254, 254, 254, 254, 254, 250, 250,
+    252, 254, 254 },
+  { 231, 243, 245, 253, 254, 254, 254, 254,
+    254, 254, 254, 254, 254, 254, 251, 251,
+    254, 254, 254 }
 };
+#endif

+// Paragraph 9.9
 void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
  VP8Proba* const proba = &dec->proba_;
  int t, b, c, p;
@ -507,19 +545,47 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
    for (b = 0; b < NUM_BANDS; ++b) {
      for (c = 0; c < NUM_CTX; ++c) {
        for (p = 0; p < NUM_PROBAS; ++p) {
-          const int v = VP8GetBit(br, CoeffsUpdateProba[t][b][c][p]) ?
-                        VP8GetValue(br, 8) : CoeffsProba0[t][b][c][p];
-          proba->bands_[t][b].probas_[c][p] = v;
+          if (VP8GetBit(br, CoeffsUpdateProba[t][b][c][p])) {
+            proba->coeffs_[t][b][c][p] = VP8GetValue(br, 8);
+          }
        }
      }
    }
-    for (b = 0; b < 16 + 1; ++b) {
-      proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]];
-    }
  }
  dec->use_skip_proba_ = VP8Get(br);
  if (dec->use_skip_proba_) {
    dec->skip_p_ = VP8GetValue(br, 8);
  }
+#ifndef ONLY_KEYFRAME_CODE
+  if (!dec->frm_hdr_.key_frame_) {
+    int i;
+    dec->intra_p_ = VP8GetValue(br, 8);
+    dec->last_p_ = VP8GetValue(br, 8);
+    dec->golden_p_ = VP8GetValue(br, 8);
+    if (VP8Get(br)) {   // update y-mode
+      for (i = 0; i < 4; ++i) {
+        proba->ymode_[i] = VP8GetValue(br, 8);
+      }
+    }
+    if (VP8Get(br)) {   // update uv-mode
+      for (i = 0; i < 3; ++i) {
+        proba->uvmode_[i] = VP8GetValue(br, 8);
+      }
+    }
+    // update MV
+    for (i = 0; i < 2; ++i) {
+      int k;
+      for (k = 0; k < NUM_MV_PROBAS; ++k) {
+        if (VP8GetBit(br, MVUpdateProba[i][k])) {
+          const int v = VP8GetValue(br, 7);
+          proba->mv_[i][k] = v ? v << 1 : 1;
+        }
+      }
+    }
+  }
+#endif
 }

+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/vp8.c
+++ b/src/dec/vp8.c
@ -13,12 +13,14 @@

 #include <stdlib.h>

-#include "./alphai.h"
 #include "./vp8i.h"
 #include "./vp8li.h"
 #include "./webpi.h"
-#include "../utils/bit_reader_inl.h"
-#include "../utils/utils.h"
+#include "../utils/bit_reader.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 //------------------------------------------------------------------------------

@ -45,10 +47,10 @@ int VP8InitIoInternal(VP8Io* const io, int version) {
 }

 VP8Decoder* VP8New(void) {
-  VP8Decoder* const dec = (VP8Decoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  VP8Decoder* const dec = (VP8Decoder*)calloc(1, sizeof(*dec));
  if (dec != NULL) {
    SetOk(dec);
-    WebPGetWorkerInterface()->Init(&dec->worker_);
+    WebPWorkerInit(&dec->worker_);
    dec->ready_ = 0;
    dec->num_parts_ = 1;
  }
@ -69,13 +71,16 @@ const char* VP8StatusMessage(VP8Decoder* const dec) {
 void VP8Delete(VP8Decoder* const dec) {
  if (dec != NULL) {
    VP8Clear(dec);
-    WebPSafeFree(dec);
+    free(dec);
  }
 }

 int VP8SetError(VP8Decoder* const dec,
                VP8StatusCode error, const char* const msg) {
-  // The oldest error reported takes precedence over the new one.
+  // TODO This check would be unnecessary if alpha decompression was separated
+  // from VP8ProcessRow/FinishRow. This avoids setting 'dec->status_' to
+  // something other than VP8_STATUS_BITSTREAM_ERROR on alpha decompression
+  // failure.
  if (dec->status_ == VP8_STATUS_OK) {
    dec->status_ = error;
    dec->error_msg_ = msg;
@ -118,9 +123,6 @@ int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
    if (((bits >> 5)) >= chunk_size) {  // partition_length
      return 0;         // inconsistent size information.
    }
-    if (w == 0 || h == 0) {
-      return 0;         // We don't support both width and height to be zero.
-    }

    if (width) {
      *width = w;
@ -190,27 +192,25 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
  const uint8_t* sz = buf;
  const uint8_t* buf_end = buf + size;
  const uint8_t* part_start;
-  size_t size_left = size;
-  size_t last_part;
-  size_t p;
+  int last_part;
+  int p;

  dec->num_parts_ = 1 << VP8GetValue(br, 2);
  last_part = dec->num_parts_ - 1;
-  if (size < 3 * last_part) {
+  part_start = buf + last_part * 3;
+  if (buf_end < part_start) {
    // we can't even read the sizes with sz[]! That's a failure.
    return VP8_STATUS_NOT_ENOUGH_DATA;
  }
-  part_start = buf + last_part * 3;
-  size_left -= last_part * 3;
  for (p = 0; p < last_part; ++p) {
-    size_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
-    if (psize > size_left) psize = size_left;
-    VP8InitBitReader(dec->parts_ + p, part_start, psize);
-    part_start += psize;
-    size_left -= psize;
+    const uint32_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
+    const uint8_t* part_end = part_start + psize;
+    if (part_end > buf_end) part_end = buf_end;
+    VP8InitBitReader(dec->parts_ + p, part_start, part_end);
+    part_start = part_end;
    sz += 3;
  }
-  VP8InitBitReader(dec->parts_ + last_part, part_start, size_left);
+  VP8InitBitReader(dec->parts_ + last_part, part_start, buf_end);
  return (part_start < buf_end) ? VP8_STATUS_OK :
           VP8_STATUS_SUSPENDED;   // Init is ok, but there's not enough data
 }
@ -249,6 +249,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
  VP8PictureHeader* pic_hdr;
  VP8BitReader* br;
  VP8StatusCode status;
+  WebPHeaderStructure headers;

  if (dec == NULL) {
    return 0;
@ -258,8 +259,33 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
    return VP8SetError(dec, VP8_STATUS_INVALID_PARAM,
                       "null VP8Io passed to VP8GetHeaders()");
  }
-  buf = io->data;
-  buf_size = io->data_size;
+
+  // Process Pre-VP8 chunks.
+  headers.data = io->data;
+  headers.data_size = io->data_size;
+  status = WebPParseHeaders(&headers);
+  if (status != VP8_STATUS_OK) {
+    return VP8SetError(dec, status, "Incorrect/incomplete header.");
+  }
+  if (headers.is_lossless) {
+    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                       "Unexpected lossless format encountered.");
+  }
+
+  if (dec->alpha_data_ == NULL) {
+    assert(dec->alpha_data_size_ == 0);
+    // We have NOT set alpha data yet. Set it now.
+    // (This is to ensure that dec->alpha_data_ is NOT reset to NULL if
+    // WebPParseHeaders() is called more than once, as in incremental decoding
+    // case.)
+    dec->alpha_data_ = headers.alpha_data;
+    dec->alpha_data_size_ = headers.alpha_data_size;
+  }
+
+  // Process the VP8 frame header.
+  buf = headers.data + headers.offset;
+  buf_size = headers.data_size - headers.offset;
+  assert(headers.data_size >= headers.offset);  // WebPParseHeaders' guarantee
  if (buf_size < 4) {
    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
                       "Truncated header.");
@ -317,6 +343,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {

    VP8ResetProba(&dec->proba_);
    ResetSegmentHeader(&dec->segment_hdr_);
+    dec->segment_ = 0;    // default for intra
  }

  // Check if we have all the partition #0 available, and initialize dec->br_
@ -327,7 +354,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
  }

  br = &dec->br_;
-  VP8InitBitReader(br, buf, frm_hdr->partition_length_);
+  VP8InitBitReader(br, buf, buf + frm_hdr->partition_length_);
  buf += frm_hdr->partition_length_;
  buf_size -= frm_hdr->partition_length_;

@ -354,14 +381,63 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {

  // Frame buffer marking
  if (!frm_hdr->key_frame_) {
+    // Paragraph 9.7
+#ifndef ONLY_KEYFRAME_CODE
+    dec->buffer_flags_ = VP8Get(br) << 0;   // update golden
+    dec->buffer_flags_ |= VP8Get(br) << 1;  // update alt ref
+    if (!(dec->buffer_flags_ & 1)) {
+      dec->buffer_flags_ |= VP8GetValue(br, 2) << 2;
+    }
+    if (!(dec->buffer_flags_ & 2)) {
+      dec->buffer_flags_ |= VP8GetValue(br, 2) << 4;
+    }
+    dec->buffer_flags_ |= VP8Get(br) << 6;    // sign bias golden
+    dec->buffer_flags_ |= VP8Get(br) << 7;    // sign bias alt ref
+#else
    return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
                       "Not a key frame.");
+#endif
+  } else {
+    dec->buffer_flags_ = 0x003 | 0x100;
  }

-  VP8Get(br);   // ignore the value of update_proba_
+  // Paragraph 9.8
+#ifndef ONLY_KEYFRAME_CODE
+  dec->update_proba_ = VP8Get(br);
+  if (!dec->update_proba_) {    // save for later restore
+    dec->proba_saved_ = dec->proba_;
+  }
+  dec->buffer_flags_ &= 1 << 8;
+  dec->buffer_flags_ |=
+      (frm_hdr->key_frame_ || VP8Get(br)) << 8;    // refresh last frame
+#else
+  VP8Get(br);   // just ignore the value of update_proba_
+#endif

  VP8ParseProba(br, dec);

+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  // Extensions
+  if (dec->pic_hdr_.colorspace_) {
+    const size_t kTrailerSize = 8;
+    const uint8_t kTrailerMarker = 0x01;
+    const uint8_t* ext_buf = buf - kTrailerSize;
+    size_t size;
+
+    if (frm_hdr->partition_length_ < kTrailerSize ||
+        ext_buf[kTrailerSize - 1] != kTrailerMarker) {
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                         "RIFF: Inconsistent extra information.");
+    }
+
+    // Layer
+    size = (ext_buf[0] << 0) | (ext_buf[1] << 8) | (ext_buf[2] << 16);
+    dec->layer_data_size_ = size;
+    dec->layer_data_ = NULL;  // will be set later
+    dec->layer_colorspace_ = ext_buf[3];
+  }
+#endif
+
  // sanitized state
  dec->ready_ = 1;
  return 1;
@ -370,6 +446,11 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
 //------------------------------------------------------------------------------
 // Residual decoding (Paragraph 13.2 / 13.3)

+static const int kBands[16 + 1] = {
+  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  0  // extra entry as sentinel
+};
+
 static const uint8_t kCat3[] = { 173, 148, 140, 0 };
 static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
 static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
@ -380,6 +461,9 @@ static const uint8_t kZigzag[16] = {
  0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
 };

+typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS];  // for const-casting
+typedef const uint8_t (*ProbaCtxArray)[NUM_PROBAS];
+
 // See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
 static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
  int v;
@ -413,19 +497,19 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
 }

 // Returns the position of the last non-zero coeff plus one
-static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[],
+// (and 0 if there's no coeff at all)
+static int GetCoeffs(VP8BitReader* const br, ProbaArray prob,
                     int ctx, const quant_t dq, int n, int16_t* out) {
-  const uint8_t* p = prob[n]->probas_[ctx];
+  // n is either 0 or 1 here. kBands[n] is not necessary for extracting '*p'.
+  const uint8_t* p = prob[n][ctx];
+  if (!VP8GetBit(br, p[0])) {   // first EOB is more a 'CBP' bit.
+    return 0;
+  }
  for (; n < 16; ++n) {
-    if (!VP8GetBit(br, p[0])) {
-      return n;  // previous coeff was last non-zero coeff
-    }
-    while (!VP8GetBit(br, p[1])) {       // sequence of zero coeffs
-      p = prob[++n]->probas_[0];
-      if (n == 16) return 16;
-    }
-    {        // non zero coeff
-      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
+    const ProbaCtxArray p_ctx = prob[kBands[n + 1]];
+    if (!VP8GetBit(br, p[1])) {
+      p = p_ctx[0];
+    } else {  // non zero coeff
      int v;
      if (!VP8GetBit(br, p[2])) {
        v = 1;
@ -435,172 +519,205 @@ static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[],
        p = p_ctx[2];
      }
      out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
+      if (n < 15 && !VP8GetBit(br, p[0])) {   // EOB
+        return n + 1;
+      }
    }
  }
  return 16;
 }

-static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
-  nz_coeffs <<= 2;
-  nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz;
-  return nz_coeffs;
-}
+// Alias-safe way of converting 4bytes to 32bits.
+typedef union {
+  uint8_t  i8[4];
+  uint32_t i32;
+} PackedNz;

-static int ParseResiduals(VP8Decoder* const dec,
-                          VP8MB* const mb, VP8BitReader* const token_br) {
-  const VP8BandProbas* (* const bands)[16 + 1] = dec->proba_.bands_ptr_;
-  const VP8BandProbas* const * ac_proba;
-  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
-  const VP8QuantMatrix* const q = &dec->dqm_[block->segment_];
-  int16_t* dst = block->coeffs_;
+// Table to unpack four bits into four bytes
+static const PackedNz kUnpackTab[16] = {
+  {{0, 0, 0, 0}},  {{1, 0, 0, 0}},  {{0, 1, 0, 0}},  {{1, 1, 0, 0}},
+  {{0, 0, 1, 0}},  {{1, 0, 1, 0}},  {{0, 1, 1, 0}},  {{1, 1, 1, 0}},
+  {{0, 0, 0, 1}},  {{1, 0, 0, 1}},  {{0, 1, 0, 1}},  {{1, 1, 0, 1}},
+  {{0, 0, 1, 1}},  {{1, 0, 1, 1}},  {{0, 1, 1, 1}},  {{1, 1, 1, 1}} };
+
+// Macro to pack four LSB of four bytes into four bits.
+#if defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) || \
+    defined(__BIG_ENDIAN__)
+#define PACK_CST 0x08040201U
+#else
+#define PACK_CST 0x01020408U
+#endif
+#define PACK(X, S) ((((X).i32 * PACK_CST) & 0xff000000) >> (S))
+
+static void ParseResiduals(VP8Decoder* const dec,
+                           VP8MB* const mb, VP8BitReader* const token_br) {
+  int out_t_nz, out_l_nz, first;
+  ProbaArray ac_prob;
+  const VP8QuantMatrix* q = &dec->dqm_[dec->segment_];
+  int16_t* dst = dec->coeffs_;
  VP8MB* const left_mb = dec->mb_info_ - 1;
-  uint8_t tnz, lnz;
-  uint32_t non_zero_y = 0;
-  uint32_t non_zero_uv = 0;
+  PackedNz nz_ac, nz_dc;
+  PackedNz tnz, lnz;
+  uint32_t non_zero_ac = 0;
+  uint32_t non_zero_dc = 0;
  int x, y, ch;
-  uint32_t out_t_nz, out_l_nz;
-  int first;

+  nz_dc.i32 = nz_ac.i32 = 0;
  memset(dst, 0, 384 * sizeof(*dst));
-  if (!block->is_i4x4_) {    // parse DC
+  if (!dec->is_i4x4_) {    // parse DC
    int16_t dc[16] = { 0 };
-    const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
-    const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat_, 0, dc);
-    mb->nz_dc_ = left_mb->nz_dc_ = (nz > 0);
-    if (nz > 1) {   // more than just the DC -> perform the full transform
-      VP8TransformWHT(dc, dst);
-    } else {        // only DC is non-zero -> inlined simplified transform
-      int i;
-      const int dc0 = (dc[0] + 3) >> 3;
-      for (i = 0; i < 16 * 16; i += 16) dst[i] = dc0;
-    }
+    const int ctx = mb->dc_nz_ + left_mb->dc_nz_;
+    mb->dc_nz_ = left_mb->dc_nz_ =
+        (GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[1],
+                   ctx, q->y2_mat_, 0, dc) > 0);
    first = 1;
-    ac_proba = bands[0];
+    ac_prob = (ProbaArray)dec->proba_.coeffs_[0];
+    VP8TransformWHT(dc, dst);
  } else {
    first = 0;
-    ac_proba = bands[3];
+    ac_prob = (ProbaArray)dec->proba_.coeffs_[3];
  }

-  tnz = mb->nz_ & 0x0f;
-  lnz = left_mb->nz_ & 0x0f;
+  tnz = kUnpackTab[mb->nz_ & 0xf];
+  lnz = kUnpackTab[left_mb->nz_ & 0xf];
  for (y = 0; y < 4; ++y) {
-    int l = lnz & 1;
-    uint32_t nz_coeffs = 0;
+    int l = lnz.i8[y];
    for (x = 0; x < 4; ++x) {
-      const int ctx = l + (tnz & 1);
-      const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst);
-      l = (nz > first);
-      tnz = (tnz >> 1) | (l << 7);
-      nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
+      const int ctx = l + tnz.i8[x];
+      const int nz = GetCoeffs(token_br, ac_prob, ctx,
+                               q->y1_mat_, first, dst);
+      tnz.i8[x] = l = (nz > 0);
+      nz_dc.i8[x] = (dst[0] != 0);
+      nz_ac.i8[x] = (nz > 1);
      dst += 16;
    }
-    tnz >>= 4;
-    lnz = (lnz >> 1) | (l << 7);
-    non_zero_y = (non_zero_y << 8) | nz_coeffs;
+    lnz.i8[y] = l;
+    non_zero_dc |= PACK(nz_dc, 24 - y * 4);
+    non_zero_ac |= PACK(nz_ac, 24 - y * 4);
  }
-  out_t_nz = tnz;
-  out_l_nz = lnz >> 4;
+  out_t_nz = PACK(tnz, 24);
+  out_l_nz = PACK(lnz, 24);

+  tnz = kUnpackTab[mb->nz_ >> 4];
+  lnz = kUnpackTab[left_mb->nz_ >> 4];
  for (ch = 0; ch < 4; ch += 2) {
-    uint32_t nz_coeffs = 0;
-    tnz = mb->nz_ >> (4 + ch);
-    lnz = left_mb->nz_ >> (4 + ch);
    for (y = 0; y < 2; ++y) {
-      int l = lnz & 1;
+      int l = lnz.i8[ch + y];
      for (x = 0; x < 2; ++x) {
-        const int ctx = l + (tnz & 1);
-        const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst);
-        l = (nz > 0);
-        tnz = (tnz >> 1) | (l << 3);
-        nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
+        const int ctx = l + tnz.i8[ch + x];
+        const int nz =
+            GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[2],
+                      ctx, q->uv_mat_, 0, dst);
+        tnz.i8[ch + x] = l = (nz > 0);
+        nz_dc.i8[y * 2 + x] = (dst[0] != 0);
+        nz_ac.i8[y * 2 + x] = (nz > 1);
        dst += 16;
      }
-      tnz >>= 2;
-      lnz = (lnz >> 1) | (l << 5);
+      lnz.i8[ch + y] = l;
    }
-    // Note: we don't really need the per-4x4 details for U/V blocks.
-    non_zero_uv |= nz_coeffs << (4 * ch);
-    out_t_nz |= (tnz << 4) << ch;
-    out_l_nz |= (lnz & 0xf0) << ch;
+    non_zero_dc |= PACK(nz_dc, 8 - ch * 2);
+    non_zero_ac |= PACK(nz_ac, 8 - ch * 2);
  }
+  out_t_nz |= PACK(tnz, 20);
+  out_l_nz |= PACK(lnz, 20);
  mb->nz_ = out_t_nz;
  left_mb->nz_ = out_l_nz;

-  block->non_zero_y_ = non_zero_y;
-  block->non_zero_uv_ = non_zero_uv;
-
-  // We look at the mode-code of each block and check if some blocks have less
-  // than three non-zero coeffs (code < 2). This is to avoid dithering flat and
-  // empty blocks.
-  block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_;
-
-  return !(non_zero_y | non_zero_uv);  // will be used for further optimization
+  dec->non_zero_ac_ = non_zero_ac;
+  dec->non_zero_ = non_zero_ac | non_zero_dc;
+  mb->skip_ = !dec->non_zero_;
 }
+#undef PACK

 //------------------------------------------------------------------------------
 // Main loop

 int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
+  VP8BitReader* const br = &dec->br_;
  VP8MB* const left = dec->mb_info_ - 1;
-  VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
-  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
-  int skip = dec->use_skip_proba_ ? block->skip_ : 0;
+  VP8MB* const info = dec->mb_info_ + dec->mb_x_;

-  if (!skip) {
-    skip = ParseResiduals(dec, mb, token_br);
+  // Note: we don't save segment map (yet), as we don't expect
+  // to decode more than 1 keyframe.
+  if (dec->segment_hdr_.update_map_) {
+    // Hardcoded tree parsing
+    dec->segment_ = !VP8GetBit(br, dec->proba_.segments_[0]) ?
+        VP8GetBit(br, dec->proba_.segments_[1]) :
+        2 + VP8GetBit(br, dec->proba_.segments_[2]);
+  }
+  info->skip_ = dec->use_skip_proba_ ? VP8GetBit(br, dec->skip_p_) : 0;
+
+  VP8ParseIntraMode(br, dec);
+  if (br->eof_) {
+    return 0;
+  }
+
+  if (!info->skip_) {
+    ParseResiduals(dec, info, token_br);
  } else {
-    left->nz_ = mb->nz_ = 0;
-    if (!block->is_i4x4_) {
-      left->nz_dc_ = mb->nz_dc_ = 0;
+    left->nz_ = info->nz_ = 0;
+    if (!dec->is_i4x4_) {
+      left->dc_nz_ = info->dc_nz_ = 0;
    }
-    block->non_zero_y_ = 0;
-    block->non_zero_uv_ = 0;
-    block->dither_ = 0;
+    dec->non_zero_ = 0;
+    dec->non_zero_ac_ = 0;
  }

  if (dec->filter_type_ > 0) {  // store filter info
    VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
-    *finfo = dec->fstrengths_[block->segment_][block->is_i4x4_];
-    finfo->f_inner_ |= !skip;
+    *finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_];
+    finfo->f_inner_ = (!info->skip_ || dec->is_i4x4_);
  }

-  return !token_br->eof_;
+  return (!token_br->eof_);
 }

 void VP8InitScanline(VP8Decoder* const dec) {
  VP8MB* const left = dec->mb_info_ - 1;
  left->nz_ = 0;
-  left->nz_dc_ = 0;
+  left->dc_nz_ = 0;
  memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
-  dec->mb_x_ = 0;
+  dec->filter_row_ =
+    (dec->filter_type_ > 0) &&
+    (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
 }

 static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
  for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
-    // Parse bitstream for this row.
    VP8BitReader* const token_br =
        &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
-    if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
-      return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
-                         "Premature end-of-partition0 encountered.");
-    }
-    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+    VP8InitScanline(dec);
+    for (dec->mb_x_ = 0; dec->mb_x_ < dec->mb_w_;  dec->mb_x_++) {
      if (!VP8DecodeMB(dec, token_br)) {
        return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
                           "Premature end-of-file encountered.");
      }
+      // Reconstruct and emit samples.
+      VP8ReconstructBlock(dec);
    }
-    VP8InitScanline(dec);   // Prepare for next scanline
-
-    // Reconstruct, filter and emit the row.
    if (!VP8ProcessRow(dec, io)) {
      return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
    }
  }
-  if (dec->mt_method_ > 0) {
-    if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) return 0;
+  if (dec->use_threads_ && !WebPWorkerSync(&dec->worker_)) {
+    return 0;
  }

+  // Finish
+#ifndef ONLY_KEYFRAME_CODE
+  if (!dec->update_proba_) {
+    dec->proba_ = dec->proba_saved_;
+  }
+#endif
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  if (dec->layer_data_size_ > 0) {
+    if (!VP8DecodeLayer(dec)) {
+      return 0;
+    }
+  }
+#endif
+
  return 1;
 }

@ -648,10 +765,12 @@ void VP8Clear(VP8Decoder* const dec) {
  if (dec == NULL) {
    return;
  }
-  WebPGetWorkerInterface()->End(&dec->worker_);
-  ALPHDelete(dec->alph_dec_);
-  dec->alph_dec_ = NULL;
-  WebPSafeFree(dec->mem_);
+  if (dec->use_threads_) {
+    WebPWorkerEnd(&dec->worker_);
+  }
+  if (dec->mem_) {
+    free(dec->mem_);
+  }
  dec->mem_ = NULL;
  dec->mem_size_ = 0;
  memset(&dec->br_, 0, sizeof(dec->br_));
@ -660,3 +779,6 @@ void VP8Clear(VP8Decoder* const dec) {

 //------------------------------------------------------------------------------

+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/vp8i.h
+++ b/src/dec/vp8i.h
@ -15,14 +15,12 @@
 #define WEBP_DEC_VP8I_H_

 #include <string.h>     // for memcpy()
-#include "./common.h"
 #include "./vp8li.h"
 #include "../utils/bit_reader.h"
-#include "../utils/random.h"
 #include "../utils/thread.h"
 #include "../dsp/dsp.h"

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

@ -31,10 +29,48 @@ extern "C" {

 // version numbers
 #define DEC_MAJ_VERSION 0
-#define DEC_MIN_VERSION 5
-#define DEC_REV_VERSION 0
+#define DEC_MIN_VERSION 3
+#define DEC_REV_VERSION 1

-// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
+#define ONLY_KEYFRAME_CODE      // to remove any code related to P-Frames
+
+// intra prediction modes
+enum { B_DC_PRED = 0,   // 4x4 modes
+       B_TM_PRED,
+       B_VE_PRED,
+       B_HE_PRED,
+       B_RD_PRED,
+       B_VR_PRED,
+       B_LD_PRED,
+       B_VL_PRED,
+       B_HD_PRED,
+       B_HU_PRED,
+       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
+
+       // Luma16 or UV modes
+       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
+       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
+       B_PRED = NUM_BMODES,   // refined I4x4 mode
+
+       // special modes
+       B_DC_PRED_NOTOP = 4,
+       B_DC_PRED_NOLEFT = 5,
+       B_DC_PRED_NOTOPLEFT = 6,
+       NUM_B_DC_MODES = 7 };
+
+enum { MB_FEATURE_TREE_PROBS = 3,
+       NUM_MB_SEGMENTS = 4,
+       NUM_REF_LF_DELTAS = 4,
+       NUM_MODE_LF_DELTAS = 4,    // I4x4, ZERO, *, SPLIT
+       MAX_NUM_PARTITIONS = 8,
+       // Probabilities
+       NUM_TYPES = 4,
+       NUM_BANDS = 8,
+       NUM_CTX = 3,
+       NUM_PROBAS = 11,
+       NUM_MV_PROBAS = 19 };
+
+// YUV-cache parameters.
 // Constraints are: We need to store one 16x16 block of luma samples (y),
 // and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned,
 // in order to be SIMD-friendly. We also need to store the top, left and
@ -56,15 +92,14 @@ extern "C" {
 //  'y' = y-samples   'u' = u-samples     'v' = u-samples
 //  '|' = left sample,   '-' = top sample,    '+' = top-left sample
 //  't' = extra top-right sample for 4x4 modes
+// With this layout, BPS (=Bytes Per Scan-line) is one cacheline size.
+#define BPS       32    // this is the common stride used by yuv[]
 #define YUV_SIZE (BPS * 17 + BPS * 9)
 #define Y_SIZE   (BPS * 17)
 #define Y_OFF    (BPS * 1 + 8)
 #define U_OFF    (Y_OFF + BPS * 16 + BPS)
 #define V_OFF    (U_OFF + 16)

-// minimal width under which lossy multi-threading is always disabled
-#define MIN_WIDTH_FOR_THREADS 512
-
 //------------------------------------------------------------------------------
 // Headers

@ -93,19 +128,15 @@ typedef struct {
  int8_t filter_strength_[NUM_MB_SEGMENTS];  // filter strength for segments
 } VP8SegmentHeader;

-// probas associated to one of the contexts
-typedef uint8_t VP8ProbaArray[NUM_PROBAS];
-
-typedef struct {   // all the probas associated to one band
-  VP8ProbaArray probas_[NUM_CTX];
-} VP8BandProbas;
-
 // Struct collecting all frame-persistent probabilities.
 typedef struct {
  uint8_t segments_[MB_FEATURE_TREE_PROBS];
  // Type: 0:Intra16-AC  1:Intra16-DC   2:Chroma   3:Intra4
-  VP8BandProbas bands_[NUM_TYPES][NUM_BANDS];
-  const VP8BandProbas* bands_ptr_[NUM_TYPES][16 + 1];
+  uint8_t coeffs_[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+#ifndef ONLY_KEYFRAME_CODE
+  uint8_t ymode_[4], uvmode_[3];
+  uint8_t mv_[2][NUM_MV_PROBAS];
+#endif
 } VP8Proba;

 // Filter parameters
@ -122,61 +153,32 @@ typedef struct {
 // Informations about the macroblocks.

 typedef struct {  // filter specs
-  uint8_t f_limit_;      // filter limit in [3..189], or 0 if no filtering
-  uint8_t f_ilevel_;     // inner limit in [1..63]
-  uint8_t f_inner_;      // do inner filtering?
-  uint8_t hev_thresh_;   // high edge variance threshold in [0..2]
+  unsigned int f_level_:6;      // filter strength: 0..63
+  unsigned int f_ilevel_:6;     // inner limit: 1..63
+  unsigned int f_inner_:1;      // do inner filtering?
 } VP8FInfo;

-typedef struct {  // Top/Left Contexts used for syntax-parsing
-  uint8_t nz_;        // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
-  uint8_t nz_dc_;     // non-zero DC coeff (1bit)
+typedef struct {  // used for syntax-parsing
+  unsigned int nz_:24;       // non-zero AC/DC coeffs (24bit)
+  unsigned int dc_nz_:1;     // non-zero DC coeffs
+  unsigned int skip_:1;      // block type
 } VP8MB;

 // Dequantization matrices
 typedef int quant_t[2];      // [DC / AC].  Can be 'uint16_t[2]' too (~slower).
 typedef struct {
  quant_t y1_mat_, y2_mat_, uv_mat_;
-
-  int uv_quant_;   // U/V quantizer value
-  int dither_;     // dithering amplitude (0 = off, max=255)
 } VP8QuantMatrix;

-// Data needed to reconstruct a macroblock
-typedef struct {
-  int16_t coeffs_[384];   // 384 coeffs = (16+4+4) * 4*4
-  uint8_t is_i4x4_;       // true if intra4x4
-  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
-  uint8_t uvmode_;        // chroma prediction mode
-  // bit-wise info about the content of each sub-4x4 blocks (in decoding order).
-  // Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
-  //   code=0 -> no coefficient
-  //   code=1 -> only DC
-  //   code=2 -> first three coefficients are non-zero
-  //   code=3 -> more than three coefficients are non-zero
-  // This allows to call specialized transform functions.
-  uint32_t non_zero_y_;
-  uint32_t non_zero_uv_;
-  uint8_t dither_;      // local dithering strength (deduced from non_zero_*)
-  uint8_t skip_;
-  uint8_t segment_;
-} VP8MBData;
-
 // Persistent information needed by the parallel processing
 typedef struct {
-  int id_;              // cache row to process (in [0..2])
-  int mb_y_;            // macroblock position of the row
-  int filter_row_;      // true if row-filtering is needed
-  VP8FInfo* f_info_;    // filter strengths (swapped with dec->f_info_)
-  VP8MBData* mb_data_;  // reconstruction data (swapped with dec->mb_data_)
-  VP8Io io_;            // copy of the VP8Io to pass to put()
+  int id_;            // cache row to process (in [0..2])
+  int mb_y_;          // macroblock position of the row
+  int filter_row_;    // true if row-filtering is needed
+  VP8FInfo* f_info_;  // filter strengths
+  VP8Io io_;          // copy of the VP8Io to pass to put()
 } VP8ThreadContext;

-// Saved top samples, per macroblock. Fits into a cache-line.
-typedef struct {
-  uint8_t y[16], u[8], v[8];
-} VP8TopSamples;
-
 //------------------------------------------------------------------------------
 // VP8Decoder: the main opaque structure handed over to user

@ -196,8 +198,7 @@ struct VP8Decoder {

  // Worker
  WebPWorker worker_;
-  int mt_method_;      // multi-thread method: 0=off, 1=[parse+recon][filter]
-                       // 2=[parse][recon+filter]
+  int use_threads_;    // use multi-thread
  int cache_id_;       // current cache row
  int num_caches_;     // number of cached rows of 16 pixels (1, 2 or 3)
  VP8ThreadContext thread_ctx_;  // Thread context
@ -214,9 +215,12 @@ struct VP8Decoder {
  // per-partition boolean decoders.
  VP8BitReader parts_[MAX_NUM_PARTITIONS];

-  // Dithering strength, deduced from decoding options
-  int dither_;                // whether to use dithering or not
-  VP8Random dithering_rg_;    // random generator for dithering
+  // buffer refresh flags
+  //   bit 0: refresh Gold, bit 1: refresh Alt
+  //   bit 2-3: copy to Gold, bit 4-5: copy to Alt
+  //   bit 6: Gold sign bias, bit 7: Alt sign bias
+  //   bit 8: refresh last frame
+  uint32_t buffer_flags_;

  // dequantization (one set of DC/AC dequant factor per segment)
  VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
@ -225,18 +229,24 @@ struct VP8Decoder {
  VP8Proba proba_;
  int use_skip_proba_;
  uint8_t skip_p_;
+#ifndef ONLY_KEYFRAME_CODE
+  uint8_t intra_p_, last_p_, golden_p_;
+  VP8Proba proba_saved_;
+  int update_proba_;
+#endif

  // Boundary data cache and persistent buffers.
-  uint8_t* intra_t_;      // top intra modes values: 4 * mb_w_
-  uint8_t  intra_l_[4];   // left intra modes values
+  uint8_t* intra_t_;     // top intra modes values: 4 * mb_w_
+  uint8_t  intra_l_[4];  // left intra modes values
+  uint8_t* y_t_;         // top luma samples: 16 * mb_w_
+  uint8_t* u_t_, *v_t_;  // top u/v samples: 8 * mb_w_ each

-  VP8TopSamples* yuv_t_;  // top y/u/v samples
+  VP8MB* mb_info_;       // contextual macroblock info (mb_w_ + 1)
+  VP8FInfo* f_info_;     // filter strength info
+  uint8_t* yuv_b_;       // main block for Y/U/V (size = YUV_SIZE)
+  int16_t* coeffs_;      // 384 coeffs = (16+8+8) * 4*4

-  VP8MB* mb_info_;        // contextual macroblock info (mb_w_ + 1)
-  VP8FInfo* f_info_;      // filter strength info
-  uint8_t* yuv_b_;        // main block for Y/U/V (size = YUV_SIZE)
-
-  uint8_t* cache_y_;      // macroblock row for storing unfiltered samples
+  uint8_t* cache_y_;     // macroblock row for storing unfiltered samples
  uint8_t* cache_u_;
  uint8_t* cache_v_;
  int cache_y_stride_;
@ -248,19 +258,32 @@ struct VP8Decoder {

  // Per macroblock non-persistent infos.
  int mb_x_, mb_y_;       // current position, in macroblock units
-  VP8MBData* mb_data_;    // parsed reconstruction data
+  uint8_t is_i4x4_;       // true if intra4x4
+  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
+  uint8_t uvmode_;        // chroma prediction mode
+  uint8_t segment_;       // block's segment
+
+  // bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
+  // for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for
+  // chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order.
+  // If the bit is set, the 4x4 block contains some non-zero coefficients.
+  uint32_t non_zero_;
+  uint32_t non_zero_ac_;

  // Filtering side-info
  int filter_type_;                          // 0=off, 1=simple, 2=complex
+  int filter_row_;                           // per-row flag
  VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2];  // precalculated per-segment/type

-  // Alpha
-  struct ALPHDecoder* alph_dec_;  // alpha-plane decoder object
-  const uint8_t* alpha_data_;     // compressed alpha data (if present)
+  // extensions
+  const uint8_t* alpha_data_;   // compressed alpha data (if present)
  size_t alpha_data_size_;
  int is_alpha_decoded_;  // true if alpha_data_ is decoded in alpha_plane_
-  uint8_t* alpha_plane_;  // output. Persistent, contains the whole data.
-  int alpha_dithering_;   // derived from decoding options (0=off, 100=full).
+  uint8_t* alpha_plane_;        // output. Persistent, contains the whole data.
+
+  int layer_colorspace_;
+  const uint8_t* layer_data_;   // compressed layer data (if present)
+  size_t layer_data_size_;
 };

 //------------------------------------------------------------------------------
@ -273,14 +296,15 @@ int VP8SetError(VP8Decoder* const dec,
 // in tree.c
 void VP8ResetProba(VP8Proba* const proba);
 void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec);
-// parses one row of intra mode data in partition 0, returns !eof
-int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec);
+void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec);

 // in quant.c
 void VP8ParseQuant(VP8Decoder* const dec);

 // in frame.c
-int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io);
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* io);
+// Predict a block and add residual
+void VP8ReconstructBlock(VP8Decoder* const dec);
 // Call io->setup() and finish setting up scan parameters.
 // After this call returns, one must always call VP8ExitCritical() with the
 // same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK
@ -289,15 +313,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
 // Must always be called in pair with VP8EnterCritical().
 // Returns false in case of error.
 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
-// Return the multi-threading method to use (0=off), depending
-// on options and bitstream size. Only for lossy decoding.
-int VP8GetThreadMethod(const WebPDecoderOptions* const options,
-                       const WebPHeaderStructure* const headers,
-                       int width, int height);
-// Initialize dithering post-process if needed.
-void VP8InitDithering(const WebPDecoderOptions* const options,
-                      VP8Decoder* const dec);
-// Process the last decoded row (filtering + output).
+// Process the last decoded row (filtering + output)
 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
 // To be called at the start of a new scanline, to initialize predictors.
 void VP8InitScanline(VP8Decoder* const dec);
@ -308,9 +324,12 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br);
 const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
                                      int row, int num_rows);

+// in layer.c
+int VP8DecodeLayer(VP8Decoder* const dec);
+
 //------------------------------------------------------------------------------

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
--- a/src/dec/vp8li.h
+++ b/src/dec/vp8li.h
@ -20,8 +20,9 @@
 #include "../utils/bit_reader.h"
 #include "../utils/color_cache.h"
 #include "../utils/huffman.h"
+#include "../webp/format_constants.h"

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

@ -40,10 +41,13 @@ struct VP8LTransform {
  uint32_t              *data_;   // transform data.
 };

+typedef struct {
+  HuffmanTree htrees_[HUFFMAN_CODES_PER_META_CODE];
+} HTreeGroup;
+
 typedef struct {
  int             color_cache_size_;
  VP8LColorCache  color_cache_;
-  VP8LColorCache  saved_color_cache_;  // for incremental

  int             huffman_mask_;
  int             huffman_subsample_bits_;
@ -51,12 +55,11 @@ typedef struct {
  uint32_t       *huffman_image_;
  int             num_htree_groups_;
  HTreeGroup     *htree_groups_;
-  HuffmanCode    *huffman_tables_;
 } VP8LMetadata;

-typedef struct VP8LDecoder VP8LDecoder;
-struct VP8LDecoder {
+typedef struct {
  VP8StatusCode    status_;
+  VP8LDecodeState  action_;
  VP8LDecodeState  state_;
  VP8Io           *io_;

@ -67,16 +70,10 @@ struct VP8LDecoder {
  uint32_t        *argb_cache_;    // Scratch buffer for temporary BGRA storage.

  VP8LBitReader    br_;
-  int              incremental_;   // if true, incremental decoding is expected
-  VP8LBitReader    saved_br_;      // note: could be local variables too
-  int              saved_last_pixel_;

  int              width_;
  int              height_;
  int              last_row_;      // last input row decoded so far.
-  int              last_pixel_;    // last pixel decoded so far. However, it may
-                                   // not be transformed, scaled and
-                                   // color-converted yet.
  int              last_out_row_;  // last row output so far.

  VP8LMetadata     hdr_;
@ -88,27 +85,18 @@ struct VP8LDecoder {

  uint8_t         *rescaler_memory;  // Working memory for rescaling work.
  WebPRescaler    *rescaler;         // Common rescaler for all channels.
-};
+} VP8LDecoder;

 //------------------------------------------------------------------------------
 // internal functions. Not public.

-struct ALPHDecoder;  // Defined in dec/alphai.h.
-
 // in vp8l.c

-// Decodes image header for alpha data stored using lossless compression.
-// Returns false in case of error.
-int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec,
-                          const uint8_t* const data, size_t data_size,
-                          uint8_t* const output);
-
-// Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are
-// already decoded in previous call(s), it will resume decoding from where it
-// was paused.
-// Returns false in case of bitstream error.
-int VP8LDecodeAlphaImageStream(struct ALPHDecoder* const alph_dec,
-                               int last_row);
+// Decodes a raw image stream (without header) and store the alpha data
+// into *output, which must be of size width x height. Returns false in case
+// of error.
+int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
+                               size_t data_size, uint8_t* const output);

 // Allocates and initialize a new lossless decoder instance.
 VP8LDecoder* VP8LNew(void);
@ -129,7 +117,7 @@ void VP8LDelete(VP8LDecoder* const dec);

 //------------------------------------------------------------------------------

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/src/dec/webp.c
+++ b/src/dec/webp.c
@ -16,9 +16,12 @@
 #include "./vp8i.h"
 #include "./vp8li.h"
 #include "./webpi.h"
-#include "../utils/utils.h"
 #include "../webp/mux_types.h"  // ALPHA_FLAG

+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 //------------------------------------------------------------------------------
 // RIFF layout is:
 //   Offset  tag
@ -44,15 +47,22 @@
 // All sizes are in little-endian order.
 // Note: chunk data size must be padded to multiple of 2 when written.

+static WEBP_INLINE uint32_t get_le24(const uint8_t* const data) {
+  return data[0] | (data[1] << 8) | (data[2] << 16);
+}
+
+static WEBP_INLINE uint32_t get_le32(const uint8_t* const data) {
+  return (uint32_t)get_le24(data) | (data[3] << 24);
+}
+
 // Validates the RIFF container (if detected) and skips over it.
-// If a RIFF container is detected, returns:
-//     VP8_STATUS_BITSTREAM_ERROR for invalid header,
-//     VP8_STATUS_NOT_ENOUGH_DATA for truncated data if have_all_data is true,
-// and VP8_STATUS_OK otherwise.
+// If a RIFF container is detected,
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid header, and
+//         VP8_STATUS_OK otherwise.
 // In case there are not enough bytes (partial RIFF container), return 0 for
 // *riff_size. Else return the RIFF size extracted from the header.
 static VP8StatusCode ParseRIFF(const uint8_t** const data,
-                               size_t* const data_size, int have_all_data,
+                               size_t* const data_size,
                               size_t* const riff_size) {
  assert(data != NULL);
  assert(data_size != NULL);
@ -63,7 +73,7 @@ static VP8StatusCode ParseRIFF(const uint8_t** const data,
    if (memcmp(*data + 8, "WEBP", TAG_SIZE)) {
      return VP8_STATUS_BITSTREAM_ERROR;  // Wrong image file signature.
    } else {
-      const uint32_t size = GetLE32(*data + TAG_SIZE);
+      const uint32_t size = get_le32(*data + TAG_SIZE);
      // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn").
      if (size < TAG_SIZE + CHUNK_HEADER_SIZE) {
        return VP8_STATUS_BITSTREAM_ERROR;
@ -71,9 +81,6 @@ static VP8StatusCode ParseRIFF(const uint8_t** const data,
      if (size > MAX_CHUNK_PAYLOAD) {
        return VP8_STATUS_BITSTREAM_ERROR;
      }
-      if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
-        return VP8_STATUS_NOT_ENOUGH_DATA;  // Truncated bitstream.
-      }
      // We have a RIFF container. Skip it.
      *riff_size = size;
      *data += RIFF_HEADER_SIZE;
@ -109,7 +116,7 @@ static VP8StatusCode ParseVP8X(const uint8_t** const data,
  if (!memcmp(*data, "VP8X", TAG_SIZE)) {
    int width, height;
    uint32_t flags;
-    const uint32_t chunk_size = GetLE32(*data + TAG_SIZE);
+    const uint32_t chunk_size = get_le32(*data + TAG_SIZE);
    if (chunk_size != VP8X_CHUNK_SIZE) {
      return VP8_STATUS_BITSTREAM_ERROR;  // Wrong chunk size.
    }
@ -118,9 +125,9 @@ static VP8StatusCode ParseVP8X(const uint8_t** const data,
    if (*data_size < vp8x_size) {
      return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
    }
-    flags = GetLE32(*data + 8);
-    width = 1 + GetLE24(*data + 12);
-    height = 1 + GetLE24(*data + 15);
+    flags = get_le32(*data + 8);
+    width = 1 + get_le24(*data + 12);
+    height = 1 + get_le24(*data + 15);
    if (width * (uint64_t)height >= MAX_IMAGE_AREA) {
      return VP8_STATUS_BITSTREAM_ERROR;  // image is too large
    }
@ -174,7 +181,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
      return VP8_STATUS_NOT_ENOUGH_DATA;
    }

-    chunk_size = GetLE32(buf + TAG_SIZE);
+    chunk_size = get_le32(buf + TAG_SIZE);
    if (chunk_size > MAX_CHUNK_PAYLOAD) {
      return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
    }
@ -220,8 +227,9 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
 // extracted from the VP8/VP8L chunk header.
 // The flag '*is_lossless' is set to 1 in case of VP8L chunk / raw VP8L data.
 static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
-                                    size_t* const data_size, int have_all_data,
-                                    size_t riff_size, size_t* const chunk_size,
+                                    size_t* const data_size,
+                                    size_t riff_size,
+                                    size_t* const chunk_size,
                                    int* const is_lossless) {
  const uint8_t* const data = *data_ptr;
  const int is_vp8 = !memcmp(data, "VP8 ", TAG_SIZE);
@ -240,13 +248,10 @@ static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,

  if (is_vp8 || is_vp8l) {
    // Bitstream contains VP8/VP8L header.
-    const uint32_t size = GetLE32(data + TAG_SIZE);
+    const uint32_t size = get_le32(data + TAG_SIZE);
    if ((riff_size >= minimal_size) && (size > riff_size - minimal_size)) {
      return VP8_STATUS_BITSTREAM_ERROR;  // Inconsistent size information.
    }
-    if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
-      return VP8_STATUS_NOT_ENOUGH_DATA;  // Truncated bitstream.
-    }
    // Skip over CHUNK_HEADER_SIZE bytes from VP8/VP8L Header.
    *chunk_size = size;
    *data_ptr += CHUNK_HEADER_SIZE;
@ -280,18 +285,9 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
                                          int* const height,
                                          int* const has_alpha,
                                          int* const has_animation,
-                                          int* const format,
                                          WebPHeaderStructure* const headers) {
-  int canvas_width = 0;
-  int canvas_height = 0;
-  int image_width = 0;
-  int image_height = 0;
  int found_riff = 0;
  int found_vp8x = 0;
-  int animation_present = 0;
-  int fragments_present = 0;
-  const int have_all_data = (headers != NULL) ? headers->have_all_data : 0;
-
  VP8StatusCode status;
  WebPHeaderStructure hdrs;

@ -303,7 +299,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
  hdrs.data_size = data_size;

  // Skip over RIFF header.
-  status = ParseRIFF(&data, &data_size, have_all_data, &hdrs.riff_size);
+  status = ParseRIFF(&data, &data_size, &hdrs.riff_size);
  if (status != VP8_STATUS_OK) {
    return status;   // Wrong RIFF header / insufficient data.
  }
@ -312,35 +308,23 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
  // Skip over VP8X.
  {
    uint32_t flags = 0;
-    status = ParseVP8X(&data, &data_size, &found_vp8x,
-                       &canvas_width, &canvas_height, &flags);
+    status = ParseVP8X(&data, &data_size, &found_vp8x, width, height, &flags);
    if (status != VP8_STATUS_OK) {
      return status;  // Wrong VP8X / insufficient data.
    }
-    animation_present = !!(flags & ANIMATION_FLAG);
-    fragments_present = !!(flags & FRAGMENTS_FLAG);
    if (!found_riff && found_vp8x) {
      // Note: This restriction may be removed in the future, if it becomes
      // necessary to send VP8X chunk to the decoder.
      return VP8_STATUS_BITSTREAM_ERROR;
    }
    if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
-    if (has_animation != NULL) *has_animation = animation_present;
-    if (format != NULL) *format = 0;   // default = undefined
-
-    image_width = canvas_width;
-    image_height = canvas_height;
-    if (found_vp8x && (animation_present || fragments_present) &&
-        headers == NULL) {
-      status = VP8_STATUS_OK;
-      goto ReturnWidthHeight;  // Just return features from VP8X header.
+    if (has_animation != NULL) *has_animation = !!(flags & ANIMATION_FLAG);
+    if (found_vp8x && headers == NULL) {
+      return VP8_STATUS_OK;  // Return features from VP8X header.
    }
  }

-  if (data_size < TAG_SIZE) {
-    status = VP8_STATUS_NOT_ENOUGH_DATA;
-    goto ReturnWidthHeight;
-  }
+  if (data_size < TAG_SIZE) return VP8_STATUS_NOT_ENOUGH_DATA;

  // Skip over optional chunks if data started with "RIFF + VP8X" or "ALPH".
  if ((found_riff && found_vp8x) ||
@ -348,49 +332,43 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
    status = ParseOptionalChunks(&data, &data_size, hdrs.riff_size,
                                 &hdrs.alpha_data, &hdrs.alpha_data_size);
    if (status != VP8_STATUS_OK) {
-      goto ReturnWidthHeight;  // Invalid chunk size / insufficient data.
+      return status;  // Found an invalid chunk size / insufficient data.
    }
  }

  // Skip over VP8/VP8L header.
-  status = ParseVP8Header(&data, &data_size, have_all_data, hdrs.riff_size,
+  status = ParseVP8Header(&data, &data_size, hdrs.riff_size,
                          &hdrs.compressed_size, &hdrs.is_lossless);
  if (status != VP8_STATUS_OK) {
-    goto ReturnWidthHeight;  // Wrong VP8/VP8L chunk-header / insufficient data.
+    return status;  // Wrong VP8/VP8L chunk-header / insufficient data.
  }
  if (hdrs.compressed_size > MAX_CHUNK_PAYLOAD) {
    return VP8_STATUS_BITSTREAM_ERROR;
  }

-  if (format != NULL && !(animation_present || fragments_present)) {
-    *format = hdrs.is_lossless ? 2 : 1;
-  }
-
  if (!hdrs.is_lossless) {
    if (data_size < VP8_FRAME_HEADER_SIZE) {
-      status = VP8_STATUS_NOT_ENOUGH_DATA;
-      goto ReturnWidthHeight;
+      return VP8_STATUS_NOT_ENOUGH_DATA;
    }
    // Validates raw VP8 data.
-    if (!VP8GetInfo(data, data_size, (uint32_t)hdrs.compressed_size,
-                    &image_width, &image_height)) {
+    if (!VP8GetInfo(data, data_size,
+                    (uint32_t)hdrs.compressed_size, width, height)) {
      return VP8_STATUS_BITSTREAM_ERROR;
    }
  } else {
    if (data_size < VP8L_FRAME_HEADER_SIZE) {
-      status = VP8_STATUS_NOT_ENOUGH_DATA;
-      goto ReturnWidthHeight;
+      return VP8_STATUS_NOT_ENOUGH_DATA;
    }
    // Validates raw VP8L data.
-    if (!VP8LGetInfo(data, data_size, &image_width, &image_height, has_alpha)) {
+    if (!VP8LGetInfo(data, data_size, width, height, has_alpha)) {
      return VP8_STATUS_BITSTREAM_ERROR;
    }
  }
-  // Validates image size coherency.
-  if (found_vp8x) {
-    if (canvas_width != image_width || canvas_height != image_height) {
-      return VP8_STATUS_BITSTREAM_ERROR;
-    }
+
+  if (has_alpha != NULL) {
+    // If the data did not contain a VP8X/VP8L chunk the only definitive way
+    // to set this is by looking for alpha data (from an ALPH chunk).
+    *has_alpha |= (hdrs.alpha_data != NULL);
  }
  if (headers != NULL) {
    *headers = hdrs;
@ -398,20 +376,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
    assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
    assert(headers->offset == headers->data_size - data_size);
  }
- ReturnWidthHeight:
-  if (status == VP8_STATUS_OK ||
-      (status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) {
-    if (has_alpha != NULL) {
-      // If the data did not contain a VP8X/VP8L chunk the only definitive way
-      // to set this is by looking for alpha data (from an ALPH chunk).
-      *has_alpha |= (hdrs.alpha_data != NULL);
-    }
-    if (width != NULL) *width = image_width;
-    if (height != NULL) *height = image_height;
-    return VP8_STATUS_OK;
-  } else {
-    return status;
-  }
+  return VP8_STATUS_OK;  // Return features from VP8 header.
 }

 VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
@ -420,8 +385,7 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
  assert(headers != NULL);
  // fill out headers, ignore width/height/has_alpha.
  status = ParseHeadersInternal(headers->data, headers->data_size,
-                                NULL, NULL, NULL, &has_animation,
-                                NULL, headers);
+                                NULL, NULL, NULL, &has_animation, headers);
  if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
    // TODO(jzern): full support of animation frames will require API additions.
    if (has_animation) {
@ -435,7 +399,7 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
 // WebPDecParams

 void WebPResetDecParams(WebPDecParams* const params) {
-  if (params != NULL) {
+  if (params) {
    memset(params, 0, sizeof(*params));
  }
 }
@ -452,7 +416,6 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,

  headers.data = data;
  headers.data_size = data_size;
-  headers.have_all_data = 1;
  status = WebPParseHeaders(&headers);   // Process Pre-VP8 chunks.
  if (status != VP8_STATUS_OK) {
    return status;
@ -469,6 +432,11 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
    if (dec == NULL) {
      return VP8_STATUS_OUT_OF_MEMORY;
    }
+#ifdef WEBP_USE_THREAD
+    dec->use_threads_ = params->options && (params->options->use_threads > 0);
+#else
+    dec->use_threads_ = 0;
+#endif
    dec->alpha_data_ = headers.alpha_data;
    dec->alpha_data_size_ = headers.alpha_data_size;

@ -480,10 +448,6 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
                                     params->output);
      if (status == VP8_STATUS_OK) {  // Decode
-        // This change must be done before calling VP8Decode()
-        dec->mt_method_ = VP8GetThreadMethod(params->options, &headers,
-                                             io.width, io.height);
-        VP8InitDithering(params->options, dec);
        if (!VP8Decode(dec, &io)) {
          status = dec->status_;
        }
@ -513,10 +477,6 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
  if (status != VP8_STATUS_OK) {
    WebPFreeDecBuffer(params->output);
  }
-
-  if (params->options != NULL && params->options->flip) {
-    status = WebPFlipBuffer(params->output);
-  }
  return status;
 }

@ -674,6 +634,7 @@ uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
 static void DefaultFeatures(WebPBitstreamFeatures* const features) {
  assert(features != NULL);
  memset(features, 0, sizeof(*features));
+  features->bitstream_version = 0;
 }

 static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
@ -687,7 +648,7 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
  return ParseHeadersInternal(data, data_size,
                              &features->width, &features->height,
                              &features->has_alpha, &features->has_animation,
-                              &features->format, NULL);
+                              NULL);
 }

 //------------------------------------------------------------------------------
@ -781,9 +742,9 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
    h = options->crop_height;
    x = options->crop_left;
    y = options->crop_top;
-    if (!WebPIsRGBMode(src_colorspace)) {   // only snap for YUV420
+    if (!WebPIsRGBMode(src_colorspace)) {   // only snap for YUV420 or YUV422
      x &= ~1;
-      y &= ~1;
+      y &= ~1;    // TODO(later): only for YUV420, not YUV422.
    }
    if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
      return 0;  // out of frame boundary error
@ -799,13 +760,11 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
  // Scaling
  io->use_scaling = (options != NULL) && (options->use_scaling > 0);
  if (io->use_scaling) {
-    int scaled_width = options->scaled_width;
-    int scaled_height = options->scaled_height;
-    if (!WebPRescalerGetScaledDimensions(w, h, &scaled_width, &scaled_height)) {
+    if (options->scaled_width <= 0 || options->scaled_height <= 0) {
      return 0;
    }
-    io->scaled_width = scaled_width;
-    io->scaled_height = scaled_height;
+    io->scaled_width = options->scaled_width;
+    io->scaled_height = options->scaled_height;
  }

  // Filter
@ -827,3 +786,6 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,

 //------------------------------------------------------------------------------

+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dec/webpi.h
+++ b/src/dec/webpi.h
@ -14,7 +14,7 @@
 #ifndef WEBP_DEC_WEBPI_H_
 #define WEBP_DEC_WEBPI_H_

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

@ -26,10 +26,7 @@ extern "C" {

 typedef struct WebPDecParams WebPDecParams;
 typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p);
-typedef int (*OutputAlphaFunc)(const VP8Io* const io, WebPDecParams* const p,
-                               int expected_num_out_lines);
-typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos,
-                             int max_out_lines);
+typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos);

 struct WebPDecParams {
  WebPDecBuffer* output;             // output buffer.
@ -43,7 +40,7 @@ struct WebPDecParams {
  void* memory;                  // overall scratch memory for the output work.

  OutputFunc emit;               // output RGB or YUV samples
-  OutputAlphaFunc emit_alpha;    // output alpha channel
+  OutputFunc emit_alpha;         // output alpha channel
  OutputRowFunc emit_alpha_row;  // output one line of rescaled alpha values
 };

@ -57,7 +54,6 @@ void WebPResetDecParams(WebPDecParams* const params);
 typedef struct {
  const uint8_t* data;         // input buffer
  size_t data_size;            // input buffer size
-  int have_all_data;           // true if all data is known to be available
  size_t offset;               // offset to main data chunk (VP8 or VP8L)
  const uint8_t* alpha_data;   // points to alpha chunk (if present)
  size_t alpha_data_size;      // alpha chunk size
@ -97,15 +93,10 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
 // dimension / etc.). If *options is not NULL, also verify that the options'
 // parameters are valid and apply them to the width/height dimensions of the
 // output buffer. This takes cropping / scaling / rotation into account.
-// Also incorporates the options->flip flag to flip the buffer parameters if
-// needed.
 VP8StatusCode WebPAllocateDecBuffer(int width, int height,
                                    const WebPDecoderOptions* const options,
                                    WebPDecBuffer* const buffer);

-// Flip buffer vertically by negating the various strides.
-VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer);
-
 // Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the
 // memory (still held by 'src').
 void WebPCopyDecBuffer(const WebPDecBuffer* const src,
@ -114,9 +105,11 @@ void WebPCopyDecBuffer(const WebPDecBuffer* const src,
 // Copy and transfer ownership from src to dst (beware of parameter order!)
 void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst);

+
+
 //------------------------------------------------------------------------------

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/src/demux/Makefile.am
+++ b/src/demux/Makefile.am
@ -1,7 +1,8 @@
+AM_CPPFLAGS = -I$(top_srcdir)/src
 lib_LTLIBRARIES = libwebpdemux.la

 libwebpdemux_la_SOURCES =
-libwebpdemux_la_SOURCES += anim_decode.c demux.c
+libwebpdemux_la_SOURCES += demux.c

 libwebpdemuxinclude_HEADERS =
 libwebpdemuxinclude_HEADERS += ../webp/demux.h
@ -9,6 +10,6 @@ libwebpdemuxinclude_HEADERS += ../webp/mux_types.h
 libwebpdemuxinclude_HEADERS += ../webp/types.h

 libwebpdemux_la_LIBADD = ../libwebp.la
-libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:0:0
+libwebpdemux_la_LDFLAGS = -no-undefined -version-info 0:1:0
 libwebpdemuxincludedir = $(includedir)/webp
 pkgconfig_DATA = libwebpdemux.pc
--- a/src/demux/anim_decode.c
+++ b/src/demux/anim_decode.c
@ -1,442 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//  AnimDecoder implementation.
-//
-
-#ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
-#endif
-
-#include <assert.h>
-#include <string.h>
-
-#include "../utils/utils.h"
-#include "../webp/decode.h"
-#include "../webp/demux.h"
-
-#define NUM_CHANNELS 4
-
-typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int);
-static void BlendPixelRowNonPremult(uint32_t* const src,
-                                    const uint32_t* const dst, int num_pixels);
-static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
-                                 int num_pixels);
-
-struct WebPAnimDecoder {
-  WebPDemuxer* demux_;             // Demuxer created from given WebP bitstream.
-  WebPDecoderConfig config_;       // Decoder config.
-  // Note: we use a pointer to a function blending multiple pixels at a time to
-  // allow possible inlining of per-pixel blending function.
-  BlendRowFunc blend_func_;        // Pointer to the chose blend row function.
-  WebPAnimInfo info_;              // Global info about the animation.
-  uint8_t* curr_frame_;            // Current canvas (not disposed).
-  uint8_t* prev_frame_disposed_;   // Previous canvas (properly disposed).
-  int prev_frame_timestamp_;       // Previous frame timestamp (milliseconds).
-  WebPIterator prev_iter_;         // Iterator object for previous frame.
-  int prev_frame_was_keyframe_;    // True if previous frame was a keyframe.
-  int next_frame_;                 // Index of the next frame to be decoded
-                                   // (starting from 1).
-};
-
-static void DefaultDecoderOptions(WebPAnimDecoderOptions* const dec_options) {
-  dec_options->color_mode = MODE_RGBA;
-  dec_options->use_threads = 0;
-}
-
-int WebPAnimDecoderOptionsInitInternal(WebPAnimDecoderOptions* dec_options,
-                                       int abi_version) {
-  if (dec_options == NULL ||
-      WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) {
-    return 0;
-  }
-  DefaultDecoderOptions(dec_options);
-  return 1;
-}
-
-static int ApplyDecoderOptions(const WebPAnimDecoderOptions* const dec_options,
-                               WebPAnimDecoder* const dec) {
-  WEBP_CSP_MODE mode;
-  WebPDecoderConfig* config = &dec->config_;
-  assert(dec_options != NULL);
-
-  mode = dec_options->color_mode;
-  if (mode != MODE_RGBA && mode != MODE_BGRA &&
-      mode != MODE_rgbA && mode != MODE_bgrA) {
-    return 0;
-  }
-  dec->blend_func_ = (mode == MODE_RGBA || mode == MODE_BGRA)
-                         ? &BlendPixelRowNonPremult
-                         : &BlendPixelRowPremult;
-  WebPInitDecoderConfig(config);
-  config->output.colorspace = mode;
-  config->output.is_external_memory = 1;
-  config->options.use_threads = dec_options->use_threads;
-  // Note: config->output.u.RGBA is set at the time of decoding each frame.
-  return 1;
-}
-
-WebPAnimDecoder* WebPAnimDecoderNewInternal(
-    const WebPData* webp_data, const WebPAnimDecoderOptions* dec_options,
-    int abi_version) {
-  WebPAnimDecoderOptions options;
-  WebPAnimDecoder* dec = NULL;
-  if (webp_data == NULL ||
-      WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) {
-    return NULL;
-  }
-
-  // Note: calloc() so that the pointer members are initialized to NULL.
-  dec = (WebPAnimDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
-  if (dec == NULL) goto Error;
-
-  if (dec_options != NULL) {
-    options = *dec_options;
-  } else {
-    DefaultDecoderOptions(&options);
-  }
-  if (!ApplyDecoderOptions(&options, dec)) goto Error;
-
-  dec->demux_ = WebPDemux(webp_data);
-  if (dec->demux_ == NULL) goto Error;
-
-  dec->info_.canvas_width = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_WIDTH);
-  dec->info_.canvas_height = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_HEIGHT);
-  dec->info_.loop_count = WebPDemuxGetI(dec->demux_, WEBP_FF_LOOP_COUNT);
-  dec->info_.bgcolor = WebPDemuxGetI(dec->demux_, WEBP_FF_BACKGROUND_COLOR);
-  dec->info_.frame_count = WebPDemuxGetI(dec->demux_, WEBP_FF_FRAME_COUNT);
-
-  {
-    const int canvas_bytes =
-        dec->info_.canvas_width * NUM_CHANNELS * dec->info_.canvas_height;
-    // Note: calloc() because we fill frame with zeroes as well.
-    dec->curr_frame_ = WebPSafeCalloc(1ULL, canvas_bytes);
-    if (dec->curr_frame_ == NULL) goto Error;
-    dec->prev_frame_disposed_ = WebPSafeCalloc(1ULL, canvas_bytes);
-    if (dec->prev_frame_disposed_ == NULL) goto Error;
-  }
-
-  WebPAnimDecoderReset(dec);
-
-  return dec;
-
- Error:
-  WebPAnimDecoderDelete(dec);
-  return NULL;
-}
-
-int WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec, WebPAnimInfo* info) {
-  if (dec == NULL || info == NULL) return 0;
-  *info = dec->info_;
-  return 1;
-}
-
-// Returns true if the frame covers the full canvas.
-static int IsFullFrame(int width, int height, int canvas_width,
-                       int canvas_height) {
-  return (width == canvas_width && height == canvas_height);
-}
-
-// Clear the canvas to transparent.
-static void ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width,
-                           uint32_t canvas_height) {
-  memset(buf, 0, canvas_width * NUM_CHANNELS * canvas_height);
-}
-
-// Clear given frame rectangle to transparent.
-static void ZeroFillFrameRect(uint8_t* buf, int buf_stride, int x_offset,
-                              int y_offset, int width, int height) {
-  int j;
-  assert(width * NUM_CHANNELS <= buf_stride);
-  buf += y_offset * buf_stride + x_offset * NUM_CHANNELS;
-  for (j = 0; j < height; ++j) {
-    memset(buf, 0, width * NUM_CHANNELS);
-    buf += buf_stride;
-  }
-}
-
-// Copy width * height pixels from 'src' to 'dst'.
-static void CopyCanvas(const uint8_t* src, uint8_t* dst,
-                       uint32_t width, uint32_t height) {
-  assert(src != NULL && dst != NULL);
-  memcpy(dst, src, width * NUM_CHANNELS * height);
-}
-
-// Returns true if the current frame is a key-frame.
-static int IsKeyFrame(const WebPIterator* const curr,
-                      const WebPIterator* const prev,
-                      int prev_frame_was_key_frame,
-                      int canvas_width, int canvas_height) {
-  if (curr->frame_num == 1) {
-    return 1;
-  } else if ((!curr->has_alpha || curr->blend_method == WEBP_MUX_NO_BLEND) &&
-             IsFullFrame(curr->width, curr->height,
-                         canvas_width, canvas_height)) {
-    return 1;
-  } else {
-    return (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) &&
-           (IsFullFrame(prev->width, prev->height, canvas_width,
-                        canvas_height) ||
-            prev_frame_was_key_frame);
-  }
-}
-
-
-// Blend a single channel of 'src' over 'dst', given their alpha channel values.
-// 'src' and 'dst' are assumed to be NOT pre-multiplied by alpha.
-static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
-                                      uint32_t dst, uint8_t dst_a,
-                                      uint32_t scale, int shift) {
-  const uint8_t src_channel = (src >> shift) & 0xff;
-  const uint8_t dst_channel = (dst >> shift) & 0xff;
-  const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
-  assert(blend_unscaled < (1ULL << 32) / scale);
-  return (blend_unscaled * scale) >> 24;
-}
-
-// Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha.
-static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) {
-  const uint8_t src_a = (src >> 24) & 0xff;
-
-  if (src_a == 0) {
-    return dst;
-  } else {
-    const uint8_t dst_a = (dst >> 24) & 0xff;
-    // This is the approximate integer arithmetic for the actual formula:
-    // dst_factor_a = (dst_a * (255 - src_a)) / 255.
-    const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8;
-    const uint8_t blend_a = src_a + dst_factor_a;
-    const uint32_t scale = (1UL << 24) / blend_a;
-
-    const uint8_t blend_r =
-        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0);
-    const uint8_t blend_g =
-        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8);
-    const uint8_t blend_b =
-        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16);
-    assert(src_a + dst_factor_a < 256);
-
-    return (blend_r << 0) |
-           (blend_g << 8) |
-           (blend_b << 16) |
-           ((uint32_t)blend_a << 24);
-  }
-}
-
-// Blend 'num_pixels' in 'src' over 'dst' assuming they are NOT pre-multiplied
-// by alpha.
-static void BlendPixelRowNonPremult(uint32_t* const src,
-                                    const uint32_t* const dst, int num_pixels) {
-  int i;
-  for (i = 0; i < num_pixels; ++i) {
-    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
-    if (src_alpha != 0xff) {
-      src[i] = BlendPixelNonPremult(src[i], dst[i]);
-    }
-  }
-}
-
-// Individually multiply each channel in 'pix' by 'scale'.
-static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) {
-  uint32_t mask = 0x00FF00FF;
-  uint32_t rb = ((pix & mask) * scale) >> 8;
-  uint32_t ag = ((pix >> 8) & mask) * scale;
-  return (rb & mask) | (ag & ~mask);
-}
-
-// Blend 'src' over 'dst' assuming they are pre-multiplied by alpha.
-static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) {
-  const uint8_t src_a = (src >> 24) & 0xff;
-  return src + ChannelwiseMultiply(dst, 256 - src_a);
-}
-
-// Blend 'num_pixels' in 'src' over 'dst' assuming they are pre-multiplied by
-// alpha.
-static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
-                                 int num_pixels) {
-  int i;
-  for (i = 0; i < num_pixels; ++i) {
-    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
-    if (src_alpha != 0xff) {
-      src[i] = BlendPixelPremult(src[i], dst[i]);
-    }
-  }
-}
-
-// Returns two ranges (<left, width> pairs) at row 'canvas_y', that belong to
-// 'src' but not 'dst'. A point range is empty if the corresponding width is 0.
-static void FindBlendRangeAtRow(const WebPIterator* const src,
-                                const WebPIterator* const dst, int canvas_y,
-                                int* const left1, int* const width1,
-                                int* const left2, int* const width2) {
-  const int src_max_x = src->x_offset + src->width;
-  const int dst_max_x = dst->x_offset + dst->width;
-  const int dst_max_y = dst->y_offset + dst->height;
-  assert(canvas_y >= src->y_offset && canvas_y < (src->y_offset + src->height));
-  *left1 = -1;
-  *width1 = 0;
-  *left2 = -1;
-  *width2 = 0;
-
-  if (canvas_y < dst->y_offset || canvas_y >= dst_max_y ||
-      src->x_offset >= dst_max_x || src_max_x <= dst->x_offset) {
-    *left1 = src->x_offset;
-    *width1 = src->width;
-    return;
-  }
-
-  if (src->x_offset < dst->x_offset) {
-    *left1 = src->x_offset;
-    *width1 = dst->x_offset - src->x_offset;
-  }
-
-  if (src_max_x > dst_max_x) {
-    *left2 = dst_max_x;
-    *width2 = src_max_x - dst_max_x;
-  }
-}
-
-int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
-                           uint8_t** buf_ptr, int* timestamp_ptr) {
-  WebPIterator iter;
-  uint32_t width;
-  uint32_t height;
-  int is_key_frame;
-  int timestamp;
-  BlendRowFunc blend_row;
-
-  if (dec == NULL || buf_ptr == NULL || timestamp_ptr == NULL) return 0;
-  if (!WebPAnimDecoderHasMoreFrames(dec)) return 0;
-
-  width = dec->info_.canvas_width;
-  height = dec->info_.canvas_height;
-  blend_row = dec->blend_func_;
-
-  // Get compressed frame.
-  if (!WebPDemuxGetFrame(dec->demux_, dec->next_frame_, &iter)) {
-    return 0;
-  }
-  timestamp = dec->prev_frame_timestamp_ + iter.duration;
-
-  // Initialize.
-  is_key_frame = IsKeyFrame(&iter, &dec->prev_iter_,
-                            dec->prev_frame_was_keyframe_, width, height);
-  if (is_key_frame) {
-    ZeroFillCanvas(dec->curr_frame_, width, height);
-  } else {
-    CopyCanvas(dec->prev_frame_disposed_, dec->curr_frame_, width, height);
-  }
-
-  // Decode.
-  {
-    const uint8_t* in = iter.fragment.bytes;
-    const size_t in_size = iter.fragment.size;
-    const size_t out_offset =
-        (iter.y_offset * width + iter.x_offset) * NUM_CHANNELS;
-    WebPDecoderConfig* const config = &dec->config_;
-    WebPRGBABuffer* const buf = &config->output.u.RGBA;
-    buf->stride = NUM_CHANNELS * width;
-    buf->size = buf->stride * iter.height;
-    buf->rgba = dec->curr_frame_ + out_offset;
-
-    if (WebPDecode(in, in_size, config) != VP8_STATUS_OK) {
-      goto Error;
-    }
-  }
-
-  // During the decoding of current frame, we may have set some pixels to be
-  // transparent (i.e. alpha < 255). However, the value of each of these
-  // pixels should have been determined by blending it against the value of
-  // that pixel in the previous frame if blending method of is WEBP_MUX_BLEND.
-  if (iter.frame_num > 1 && iter.blend_method == WEBP_MUX_BLEND &&
-      !is_key_frame) {
-    if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_NONE) {
-      int y;
-      // Blend transparent pixels with pixels in previous canvas.
-      for (y = 0; y < iter.height; ++y) {
-        const size_t offset =
-            (iter.y_offset + y) * width + iter.x_offset;
-        blend_row((uint32_t*)dec->curr_frame_ + offset,
-                  (uint32_t*)dec->prev_frame_disposed_ + offset, iter.width);
-      }
-    } else {
-      int y;
-      assert(dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND);
-      // We need to blend a transparent pixel with its value just after
-      // initialization. That is, blend it with:
-      // * Fully transparent pixel if it belongs to prevRect <-- No-op.
-      // * The pixel in the previous canvas otherwise <-- Need alpha-blending.
-      for (y = 0; y < iter.height; ++y) {
-        const int canvas_y = iter.y_offset + y;
-        int left1, width1, left2, width2;
-        FindBlendRangeAtRow(&iter, &dec->prev_iter_, canvas_y, &left1, &width1,
-                            &left2, &width2);
-        if (width1 > 0) {
-          const size_t offset1 = canvas_y * width + left1;
-          blend_row((uint32_t*)dec->curr_frame_ + offset1,
-                    (uint32_t*)dec->prev_frame_disposed_ + offset1, width1);
-        }
-        if (width2 > 0) {
-          const size_t offset2 = canvas_y * width + left2;
-          blend_row((uint32_t*)dec->curr_frame_ + offset2,
-                    (uint32_t*)dec->prev_frame_disposed_ + offset2, width2);
-        }
-      }
-    }
-  }
-
-  // Update info of the previous frame and dispose it for the next iteration.
-  dec->prev_frame_timestamp_ = timestamp;
-  dec->prev_iter_ = iter;
-  dec->prev_frame_was_keyframe_ = is_key_frame;
-  CopyCanvas(dec->curr_frame_, dec->prev_frame_disposed_, width, height);
-  if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
-    ZeroFillFrameRect(dec->prev_frame_disposed_, width * NUM_CHANNELS,
-                      dec->prev_iter_.x_offset, dec->prev_iter_.y_offset,
-                      dec->prev_iter_.width, dec->prev_iter_.height);
-  }
-  ++dec->next_frame_;
-
-  // All OK, fill in the values.
-  *buf_ptr = dec->curr_frame_;
-  *timestamp_ptr = timestamp;
-  return 1;
-
- Error:
-  WebPDemuxReleaseIterator(&iter);
-  return 0;
-}
-
-int WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec) {
-  if (dec == NULL) return 0;
-  return (dec->next_frame_ <= (int)dec->info_.frame_count);
-}
-
-void WebPAnimDecoderReset(WebPAnimDecoder* dec) {
-  if (dec != NULL) {
-    dec->prev_frame_timestamp_ = 0;
-    memset(&dec->prev_iter_, 0, sizeof(dec->prev_iter_));
-    dec->prev_frame_was_keyframe_ = 0;
-    dec->next_frame_ = 1;
-  }
-}
-
-const WebPDemuxer* WebPAnimDecoderGetDemuxer(const WebPAnimDecoder* dec) {
-  if (dec == NULL) return NULL;
-  return dec->demux_;
-}
-
-void WebPAnimDecoderDelete(WebPAnimDecoder* dec) {
-  if (dec != NULL) {
-    WebPDemuxDelete(dec->demux_);
-    WebPSafeFree(dec->curr_frame_);
-    WebPSafeFree(dec->prev_frame_disposed_);
-    WebPSafeFree(dec);
-  }
-}
--- a/src/demux/demux.c
+++ b/src/demux/demux.c
@ -11,7 +11,7 @@
 //

 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "config.h"
 #endif

 #include <assert.h>
@ -23,9 +23,13 @@
 #include "../webp/demux.h"
 #include "../webp/format_constants.h"

+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 #define DMUX_MAJ_VERSION 0
-#define DMUX_MIN_VERSION 3
-#define DMUX_REV_VERSION 0
+#define DMUX_MIN_VERSION 1
+#define DMUX_REV_VERSION 1

 typedef struct {
  size_t start_;        // start location of the data
@ -43,11 +47,10 @@ typedef struct {
 typedef struct Frame {
  int x_offset_, y_offset_;
  int width_, height_;
-  int has_alpha_;
  int duration_;
  WebPMuxAnimDispose dispose_method_;
-  WebPMuxAnimBlend blend_method_;
-  int frame_num_;
+  int is_fragment_;  // this is a frame fragment (and not a full frame).
+  int frame_num_;  // the referent frame number for use in assembling fragments.
  int complete_;   // img_components_ contains a full image.
  ChunkData img_components_[2];  // 0=VP8{,L} 1=ALPH
  struct Frame* next_;
@ -70,7 +73,6 @@ struct WebPDemuxer {
  Frame* frames_;
  Frame** frames_tail_;
  Chunk* chunks_;  // non-image chunks
-  Chunk** chunks_tail_;
 };

 typedef enum {
@ -175,9 +177,10 @@ static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
 // Secondary chunk parsing

 static void AddChunk(WebPDemuxer* const dmux, Chunk* const chunk) {
-  *dmux->chunks_tail_ = chunk;
+  Chunk** c = &dmux->chunks_;
+  while (*c != NULL) c = &(*c)->next_;
+  *c = chunk;
  chunk->next_ = NULL;
-  dmux->chunks_tail_ = &chunk->next_;
 }

 // Add a frame to the end of the list, ensuring the last frame is complete.
@ -192,27 +195,19 @@ static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
  return 1;
 }

-static void SetFrameInfo(size_t start_offset, size_t size,
-                         int frame_num, int complete,
-                         const WebPBitstreamFeatures* const features,
-                         Frame* const frame) {
-  frame->img_components_[0].offset_ = start_offset;
-  frame->img_components_[0].size_ = size;
-  frame->width_ = features->width;
-  frame->height_ = features->height;
-  frame->has_alpha_ |= features->has_alpha;
-  frame->frame_num_ = frame_num;
-  frame->complete_ = complete;
-}
-
 // Store image bearing chunks to 'frame'.
+// If 'has_vp8l_alpha' is not NULL, it will be set to true if the frame is a
+// lossless image with alpha.
 static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
-                              MemBuffer* const mem, Frame* const frame) {
+                              MemBuffer* const mem, Frame* const frame,
+                              int* const has_vp8l_alpha) {
  int alpha_chunks = 0;
  int image_chunks = 0;
  int done = (MemDataSize(mem) < min_size);
  ParseStatus status = PARSE_OK;

+  if (has_vp8l_alpha != NULL) *has_vp8l_alpha = 0;  // Default.
+
  if (done) return PARSE_NEED_MORE_DATA;

  do {
@ -234,7 +229,6 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
          ++alpha_chunks;
          frame->img_components_[1].offset_ = chunk_start_offset;
          frame->img_components_[1].size_ = chunk_size;
-          frame->has_alpha_ = 1;
          frame->frame_num_ = frame_num;
          Skip(mem, payload_available);
        } else {
@ -260,8 +254,13 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
            return PARSE_ERROR;
          }
          ++image_chunks;
-          SetFrameInfo(chunk_start_offset, chunk_size, frame_num,
-                       status == PARSE_OK, &features, frame);
+          frame->img_components_[0].offset_ = chunk_start_offset;
+          frame->img_components_[0].size_ = chunk_size;
+          frame->width_ = features.width;
+          frame->height_ = features.height;
+          if (has_vp8l_alpha != NULL) *has_vp8l_alpha = features.has_alpha;
+          frame->frame_num_ = frame_num;
+          frame->complete_ = (status == PARSE_OK);
          Skip(mem, payload_available);
        } else {
          goto Done;
@ -296,7 +295,7 @@ static ParseStatus NewFrame(const MemBuffer* const mem,
  if (actual_size < min_size) return PARSE_ERROR;
  if (MemDataSize(mem) < min_size)  return PARSE_NEED_MORE_DATA;

-  *frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(**frame));
+  *frame = (Frame*)calloc(1, sizeof(**frame));
  return (*frame == NULL) ? PARSE_ERROR : PARSE_OK;
 }

@ -304,10 +303,9 @@ static ParseStatus NewFrame(const MemBuffer* const mem,
 // 'frame_chunk_size' is the previously validated, padded chunk size.
 static ParseStatus ParseAnimationFrame(
    WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
-  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  const int has_frames = !!(dmux->feature_flags_ & ANIMATION_FLAG);
  const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
  int added_frame = 0;
-  int bits;
  MemBuffer* const mem = &dmux->mem_;
  Frame* frame;
  ParseStatus status =
@ -319,19 +317,17 @@ static ParseStatus ParseAnimationFrame(
  frame->width_          = 1 + ReadLE24s(mem);
  frame->height_         = 1 + ReadLE24s(mem);
  frame->duration_       = ReadLE24s(mem);
-  bits = ReadByte(mem);
-  frame->dispose_method_ =
-      (bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
-  frame->blend_method_ = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
+  frame->dispose_method_ = (WebPMuxAnimDispose)(ReadByte(mem) & 1);
  if (frame->width_ * (uint64_t)frame->height_ >= MAX_IMAGE_AREA) {
-    WebPSafeFree(frame);
+    free(frame);
    return PARSE_ERROR;
  }

  // Store a frame only if the animation flag is set there is some data for
  // this frame is available.
-  status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame);
-  if (status != PARSE_ERROR && is_animation && frame->frame_num_ > 0) {
+  status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame,
+                      NULL);
+  if (status != PARSE_ERROR && has_frames && frame->frame_num_ > 0) {
    added_frame = AddFrame(dmux, frame);
    if (added_frame) {
      ++dmux->num_frames_;
@ -340,17 +336,53 @@ static ParseStatus ParseAnimationFrame(
    }
  }

-  if (!added_frame) WebPSafeFree(frame);
+  if (!added_frame) free(frame);
  return status;
 }

+#ifdef WEBP_EXPERIMENTAL_FEATURES
+// Parse a 'FRGM' chunk and any image bearing chunks that immediately follow.
+// 'fragment_chunk_size' is the previously validated, padded chunk size.
+static ParseStatus ParseFragment(WebPDemuxer* const dmux,
+                                 uint32_t fragment_chunk_size) {
+  const int frame_num = 1;  // All fragments belong to the 1st (and only) frame.
+  const int has_fragments = !!(dmux->feature_flags_ & FRAGMENTS_FLAG);
+  const uint32_t frgm_payload_size = fragment_chunk_size - FRGM_CHUNK_SIZE;
+  int added_fragment = 0;
+  MemBuffer* const mem = &dmux->mem_;
+  Frame* frame;
+  ParseStatus status =
+      NewFrame(mem, FRGM_CHUNK_SIZE, fragment_chunk_size, &frame);
+  if (status != PARSE_OK) return status;
+
+  frame->is_fragment_ = 1;
+  frame->x_offset_ = 2 * ReadLE24s(mem);
+  frame->y_offset_ = 2 * ReadLE24s(mem);
+
+  // Store a fragment only if the fragments flag is set there is some data for
+  // this fragment is available.
+  status = StoreFrame(frame_num, frgm_payload_size, mem, frame, NULL);
+  if (status != PARSE_ERROR && has_fragments && frame->frame_num_ > 0) {
+    added_fragment = AddFrame(dmux, frame);
+    if (!added_fragment) {
+      status = PARSE_ERROR;
+    } else {
+      dmux->num_frames_ = 1;
+    }
+  }
+
+  if (!added_fragment) free(frame);
+  return status;
+}
+#endif  // WEBP_EXPERIMENTAL_FEATURES
+
 // General chunk storage, starting with the header at 'start_offset', allowing
 // the user to request the payload via a fourcc string. 'size' includes the
 // header and the unpadded payload size.
 // Returns true on success, false otherwise.
 static int StoreChunk(WebPDemuxer* const dmux,
                      size_t start_offset, uint32_t size) {
-  Chunk* const chunk = (Chunk*)WebPSafeCalloc(1ULL, sizeof(*chunk));
+  Chunk* const chunk = (Chunk*)calloc(1, sizeof(*chunk));
  if (chunk == NULL) return 0;

  chunk->data_.offset_ = start_offset;
@ -362,20 +394,20 @@ static int StoreChunk(WebPDemuxer* const dmux,
 // -----------------------------------------------------------------------------
 // Primary chunk parsing

-static ParseStatus ReadHeader(MemBuffer* const mem) {
+static int ReadHeader(MemBuffer* const mem) {
  const size_t min_size = RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE;
  uint32_t riff_size;

  // Basic file level validation.
-  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
+  if (MemDataSize(mem) < min_size) return 0;
  if (memcmp(GetBuffer(mem), "RIFF", CHUNK_SIZE_BYTES) ||
      memcmp(GetBuffer(mem) + CHUNK_HEADER_SIZE, "WEBP", CHUNK_SIZE_BYTES)) {
-    return PARSE_ERROR;
+    return 0;
  }

  riff_size = GetLE32(GetBuffer(mem) + TAG_SIZE);
-  if (riff_size < CHUNK_HEADER_SIZE) return PARSE_ERROR;
-  if (riff_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+  if (riff_size < CHUNK_HEADER_SIZE) return 0;
+  if (riff_size > MAX_CHUNK_PAYLOAD) return 0;

  // There's no point in reading past the end of the RIFF chunk
  mem->riff_end_ = riff_size + CHUNK_HEADER_SIZE;
@ -384,7 +416,7 @@ static ParseStatus ReadHeader(MemBuffer* const mem) {
  }

  Skip(mem, RIFF_HEADER_SIZE);
-  return PARSE_OK;
+  return 1;
 }

 static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
@ -392,25 +424,25 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
  MemBuffer* const mem = &dmux->mem_;
  Frame* frame;
  ParseStatus status;
-  int image_added = 0;
+  int has_vp8l_alpha = 0;  // Frame contains a lossless image with alpha.

  if (dmux->frames_ != NULL) return PARSE_ERROR;
  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;

-  frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
+  frame = (Frame*)calloc(1, sizeof(*frame));
  if (frame == NULL) return PARSE_ERROR;

  // For the single image case we allow parsing of a partial frame, but we need
  // at least CHUNK_HEADER_SIZE for parsing.
-  status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame);
+  status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame,
+                      &has_vp8l_alpha);
  if (status != PARSE_ERROR) {
    const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
    // Clear any alpha when the alpha flag is missing.
    if (!has_alpha && frame->img_components_[1].size_ > 0) {
      frame->img_components_[1].offset_ = 0;
      frame->img_components_[1].size_ = 0;
-      frame->has_alpha_ = 0;
    }

    // Use the frame width/height as the canvas values for non-vp8x files.
@ -419,26 +451,47 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
      dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
      dmux->canvas_width_ = frame->width_;
      dmux->canvas_height_ = frame->height_;
-      dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
-    }
-    if (!AddFrame(dmux, frame)) {
-      status = PARSE_ERROR;  // last frame was left incomplete
-    } else {
-      image_added = 1;
-      dmux->num_frames_ = 1;
+      dmux->feature_flags_ |= has_vp8l_alpha ? ALPHA_FLAG : 0;
    }
+    AddFrame(dmux, frame);
+    dmux->num_frames_ = 1;
+  } else {
+    free(frame);
  }

-  if (!image_added) WebPSafeFree(frame);
  return status;
 }

-static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
-  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
  MemBuffer* const mem = &dmux->mem_;
  int anim_chunks = 0;
+  uint32_t vp8x_size;
  ParseStatus status = PARSE_OK;

+  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
+
+  dmux->is_ext_format_ = 1;
+  Skip(mem, TAG_SIZE);  // VP8X
+  vp8x_size = ReadLE32(mem);
+  if (vp8x_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+  if (vp8x_size < VP8X_CHUNK_SIZE) return PARSE_ERROR;
+  vp8x_size += vp8x_size & 1;
+  if (SizeIsInvalid(mem, vp8x_size)) return PARSE_ERROR;
+  if (MemDataSize(mem) < vp8x_size) return PARSE_NEED_MORE_DATA;
+
+  dmux->feature_flags_ = ReadByte(mem);
+  Skip(mem, 3);  // Reserved.
+  dmux->canvas_width_  = 1 + ReadLE24s(mem);
+  dmux->canvas_height_ = 1 + ReadLE24s(mem);
+  if (dmux->canvas_width_ * (uint64_t)dmux->canvas_height_ >= MAX_IMAGE_AREA) {
+    return PARSE_ERROR;  // image final dimension is too large
+  }
+  Skip(mem, vp8x_size - VP8X_CHUNK_SIZE);  // skip any trailing data.
+  dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
+
+  if (SizeIsInvalid(mem, CHUNK_HEADER_SIZE)) return PARSE_ERROR;
+  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
+
  do {
    int store_chunk = 1;
    const size_t chunk_start_offset = mem->start_;
@ -457,7 +510,7 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
      case MKFOURCC('V', 'P', '8', ' '):
      case MKFOURCC('V', 'P', '8', 'L'): {
        // check that this isn't an animation (all frames should be in an ANMF).
-        if (anim_chunks > 0 || is_animation) return PARSE_ERROR;
+        if (anim_chunks > 0) return PARSE_ERROR;

        Rewind(mem, CHUNK_HEADER_SIZE);
        status = ParseSingleImage(dmux);
@ -484,18 +537,24 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
        status = ParseAnimationFrame(dmux, chunk_size_padded);
        break;
      }
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+      case MKFOURCC('F', 'R', 'G', 'M'): {
+        status = ParseFragment(dmux, chunk_size_padded);
+        break;
+      }
+#endif
      case MKFOURCC('I', 'C', 'C', 'P'): {
        store_chunk = !!(dmux->feature_flags_ & ICCP_FLAG);
        goto Skip;
      }
-      case MKFOURCC('E', 'X', 'I', 'F'): {
-        store_chunk = !!(dmux->feature_flags_ & EXIF_FLAG);
-        goto Skip;
-      }
      case MKFOURCC('X', 'M', 'P', ' '): {
        store_chunk = !!(dmux->feature_flags_ & XMP_FLAG);
        goto Skip;
      }
+      case MKFOURCC('E', 'X', 'I', 'F'): {
+        store_chunk = !!(dmux->feature_flags_ & EXIF_FLAG);
+        goto Skip;
+      }
 Skip:
      default: {
        if (chunk_size_padded <= MemDataSize(mem)) {
@ -524,37 +583,6 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
  return status;
 }

-static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
-  MemBuffer* const mem = &dmux->mem_;
-  uint32_t vp8x_size;
-
-  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
-
-  dmux->is_ext_format_ = 1;
-  Skip(mem, TAG_SIZE);  // VP8X
-  vp8x_size = ReadLE32(mem);
-  if (vp8x_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
-  if (vp8x_size < VP8X_CHUNK_SIZE) return PARSE_ERROR;
-  vp8x_size += vp8x_size & 1;
-  if (SizeIsInvalid(mem, vp8x_size)) return PARSE_ERROR;
-  if (MemDataSize(mem) < vp8x_size) return PARSE_NEED_MORE_DATA;
-
-  dmux->feature_flags_ = ReadByte(mem);
-  Skip(mem, 3);  // Reserved.
-  dmux->canvas_width_  = 1 + ReadLE24s(mem);
-  dmux->canvas_height_ = 1 + ReadLE24s(mem);
-  if (dmux->canvas_width_ * (uint64_t)dmux->canvas_height_ >= MAX_IMAGE_AREA) {
-    return PARSE_ERROR;  // image final dimension is too large
-  }
-  Skip(mem, vp8x_size - VP8X_CHUNK_SIZE);  // skip any trailing data.
-  dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
-
-  if (SizeIsInvalid(mem, CHUNK_HEADER_SIZE)) return PARSE_ERROR;
-  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
-
-  return ParseVP8XChunks(dmux);
-}
-
 // -----------------------------------------------------------------------------
 // Format validation

@ -569,48 +597,30 @@ static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
  return 1;
 }

-// If 'exact' is true, check that the image resolution matches the canvas.
-// If 'exact' is false, check that the x/y offsets do not exceed the canvas.
-static int CheckFrameBounds(const Frame* const frame, int exact,
-                            int canvas_width, int canvas_height) {
-  if (exact) {
-    if (frame->x_offset_ != 0 || frame->y_offset_ != 0) {
-      return 0;
-    }
-    if (frame->width_ != canvas_width || frame->height_ != canvas_height) {
-      return 0;
-    }
-  } else {
-    if (frame->x_offset_ < 0 || frame->y_offset_ < 0) return 0;
-    if (frame->width_ + frame->x_offset_ > canvas_width) return 0;
-    if (frame->height_ + frame->y_offset_ > canvas_height) return 0;
-  }
-  return 1;
-}
-
 static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
-  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
-  const int is_fragmented = !!(dmux->feature_flags_ & FRAGMENTS_FLAG);
-  const Frame* f = dmux->frames_;
+  const int has_fragments = !!(dmux->feature_flags_ & FRAGMENTS_FLAG);
+  const int has_frames = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  const Frame* f;

  if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;

  if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
  if (dmux->loop_count_ < 0) return 0;
  if (dmux->state_ == WEBP_DEMUX_DONE && dmux->frames_ == NULL) return 0;
-  if (is_fragmented) return 0;

-  while (f != NULL) {
+  for (f = dmux->frames_; f != NULL; f = f->next_) {
    const int cur_frame_set = f->frame_num_;
-    int frame_count = 0;
+    int frame_count = 0, fragment_count = 0;

-    // Check frame properties.
+    // Check frame properties and if the image is composed of fragments that
+    // each fragment came from a fragment.
    for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
      const ChunkData* const image = f->img_components_;
      const ChunkData* const alpha = f->img_components_ + 1;

-      if (!is_animation && f->frame_num_ > 1) return 0;
-
+      if (!has_fragments && f->is_fragment_) return 0;
+      if (!has_frames && f->frame_num_ > 1) return 0;
+      if (f->x_offset_ < 0 || f->y_offset_ < 0) return 0;
      if (f->complete_) {
        if (alpha->size_ == 0 && image->size_ == 0) return 0;
        // Ensure alpha precedes image bitstream.
@ -632,14 +642,12 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
        if (f->next_ != NULL) return 0;
      }

-      if (f->width_ > 0 && f->height_ > 0 &&
-          !CheckFrameBounds(f, !is_animation,
-                            dmux->canvas_width_, dmux->canvas_height_)) {
-        return 0;
-      }
-
+      fragment_count += f->is_fragment_;
      ++frame_count;
    }
+    if (!has_fragments && frame_count > 1) return 0;
+    if (fragment_count > 0 && frame_count != fragment_count) return 0;
+    if (f == NULL) break;
  }
  return 1;
 }
@ -654,45 +662,9 @@ static void InitDemux(WebPDemuxer* const dmux, const MemBuffer* const mem) {
  dmux->canvas_width_ = -1;
  dmux->canvas_height_ = -1;
  dmux->frames_tail_ = &dmux->frames_;
-  dmux->chunks_tail_ = &dmux->chunks_;
  dmux->mem_ = *mem;
 }

-static ParseStatus CreateRawImageDemuxer(MemBuffer* const mem,
-                                         WebPDemuxer** demuxer) {
-  WebPBitstreamFeatures features;
-  const VP8StatusCode status =
-      WebPGetFeatures(mem->buf_, mem->buf_size_, &features);
-  *demuxer = NULL;
-  if (status != VP8_STATUS_OK) {
-    return (status == VP8_STATUS_NOT_ENOUGH_DATA) ? PARSE_NEED_MORE_DATA
-                                                  : PARSE_ERROR;
-  }
-
-  {
-    WebPDemuxer* const dmux = (WebPDemuxer*)WebPSafeCalloc(1ULL, sizeof(*dmux));
-    Frame* const frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
-    if (dmux == NULL || frame == NULL) goto Error;
-    InitDemux(dmux, mem);
-    SetFrameInfo(0, mem->buf_size_, 1 /*frame_num*/, 1 /*complete*/, &features,
-                 frame);
-    if (!AddFrame(dmux, frame)) goto Error;
-    dmux->state_ = WEBP_DEMUX_DONE;
-    dmux->canvas_width_ = frame->width_;
-    dmux->canvas_height_ = frame->height_;
-    dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
-    dmux->num_frames_ = 1;
-    assert(IsValidSimpleFormat(dmux));
-    *demuxer = dmux;
-    return PARSE_OK;
-
- Error:
-    WebPSafeFree(dmux);
-    WebPSafeFree(frame);
-    return PARSE_ERROR;
-  }
-}
-
 WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
                               WebPDemuxState* state, int version) {
  const ChunkParser* parser;
@ -701,49 +673,29 @@ WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
  MemBuffer mem;
  WebPDemuxer* dmux;

-  if (state != NULL) *state = WEBP_DEMUX_PARSE_ERROR;
-
  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DEMUX_ABI_VERSION)) return NULL;
  if (data == NULL || data->bytes == NULL || data->size == 0) return NULL;

  if (!InitMemBuffer(&mem, data->bytes, data->size)) return NULL;
-  status = ReadHeader(&mem);
-  if (status != PARSE_OK) {
-    // If parsing of the webp file header fails attempt to handle a raw
-    // VP8/VP8L frame. Note 'allow_partial' is ignored in this case.
-    if (status == PARSE_ERROR) {
-      status = CreateRawImageDemuxer(&mem, &dmux);
-      if (status == PARSE_OK) {
-        if (state != NULL) *state = WEBP_DEMUX_DONE;
-        return dmux;
-      }
-    }
-    if (state != NULL) {
-      *state = (status == PARSE_NEED_MORE_DATA) ? WEBP_DEMUX_PARSING_HEADER
-                                                : WEBP_DEMUX_PARSE_ERROR;
-    }
-    return NULL;
-  }
+  if (!ReadHeader(&mem)) return NULL;

  partial = (mem.buf_size_ < mem.riff_end_);
  if (!allow_partial && partial) return NULL;

-  dmux = (WebPDemuxer*)WebPSafeCalloc(1ULL, sizeof(*dmux));
+  dmux = (WebPDemuxer*)calloc(1, sizeof(*dmux));
  if (dmux == NULL) return NULL;
  InitDemux(dmux, &mem);

-  status = PARSE_ERROR;
  for (parser = kMasterChunks; parser->parse != NULL; ++parser) {
    if (!memcmp(parser->id, GetBuffer(&dmux->mem_), TAG_SIZE)) {
      status = parser->parse(dmux);
      if (status == PARSE_OK) dmux->state_ = WEBP_DEMUX_DONE;
      if (status == PARSE_NEED_MORE_DATA && !partial) status = PARSE_ERROR;
      if (status != PARSE_ERROR && !parser->valid(dmux)) status = PARSE_ERROR;
-      if (status == PARSE_ERROR) dmux->state_ = WEBP_DEMUX_PARSE_ERROR;
      break;
    }
  }
-  if (state != NULL) *state = dmux->state_;
+  if (state) *state = dmux->state_;

  if (status == PARSE_ERROR) {
    WebPDemuxDelete(dmux);
@ -760,14 +712,14 @@ void WebPDemuxDelete(WebPDemuxer* dmux) {
  for (f = dmux->frames_; f != NULL;) {
    Frame* const cur_frame = f;
    f = f->next_;
-    WebPSafeFree(cur_frame);
+    free(cur_frame);
  }
  for (c = dmux->chunks_; c != NULL;) {
    Chunk* const cur_chunk = c;
    c = c->next_;
-    WebPSafeFree(cur_chunk);
+    free(cur_chunk);
  }
-  WebPSafeFree(dmux);
+  free(dmux);
 }

 // -----------------------------------------------------------------------------
@ -789,6 +741,8 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
 // -----------------------------------------------------------------------------
 // Frame iteration

+// Find the first 'frame_num' frame. There may be multiple such frames in a
+// fragmented frame.
 static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
  const Frame* f;
  for (f = dmux->frames_; f != NULL; f = f->next_) {
@ -797,6 +751,21 @@ static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
  return f;
 }

+// Returns fragment 'fragment_num' and the total count.
+static const Frame* GetFragment(
+    const Frame* const frame_set, int fragment_num, int* const count) {
+  const int this_frame = frame_set->frame_num_;
+  const Frame* f = frame_set;
+  const Frame* fragment = NULL;
+  int total;
+
+  for (total = 0; f != NULL && f->frame_num_ == this_frame; f = f->next_) {
+    if (++total == fragment_num) fragment = f;
+  }
+  *count = total;
+  return fragment;
+}
+
 static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
                                      const Frame* const frame,
                                      size_t* const data_size) {
@ -823,27 +792,32 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,

 // Create a whole 'frame' from VP8 (+ alpha) or lossless.
 static int SynthesizeFrame(const WebPDemuxer* const dmux,
-                           const Frame* const frame,
-                           WebPIterator* const iter) {
+                           const Frame* const first_frame,
+                           int fragment_num, WebPIterator* const iter) {
  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  int num_fragments;
  size_t payload_size = 0;
-  const uint8_t* const payload = GetFramePayload(mem_buf, frame, &payload_size);
+  const Frame* const fragment =
+      GetFragment(first_frame, fragment_num, &num_fragments);
+  const uint8_t* const payload =
+      GetFramePayload(mem_buf, fragment, &payload_size);
  if (payload == NULL) return 0;
-  assert(frame != NULL);
+  assert(first_frame != NULL);

-  iter->frame_num      = frame->frame_num_;
+  iter->frame_num      = first_frame->frame_num_;
  iter->num_frames     = dmux->num_frames_;
-  iter->x_offset       = frame->x_offset_;
-  iter->y_offset       = frame->y_offset_;
-  iter->width          = frame->width_;
-  iter->height         = frame->height_;
-  iter->has_alpha      = frame->has_alpha_;
-  iter->duration       = frame->duration_;
-  iter->dispose_method = frame->dispose_method_;
-  iter->blend_method   = frame->blend_method_;
-  iter->complete       = frame->complete_;
+  iter->fragment_num   = fragment_num;
+  iter->num_fragments  = num_fragments;
+  iter->x_offset       = fragment->x_offset_;
+  iter->y_offset       = fragment->y_offset_;
+  iter->width          = fragment->width_;
+  iter->height         = fragment->height_;
+  iter->duration       = fragment->duration_;
+  iter->dispose_method = fragment->dispose_method_;
+  iter->complete       = fragment->complete_;
  iter->fragment.bytes = payload;
  iter->fragment.size  = payload_size;
+  // TODO(jzern): adjust offsets for 'FRGM's embedded in 'ANMF's
  return 1;
 }

@ -857,7 +831,7 @@ static int SetFrame(int frame_num, WebPIterator* const iter) {
  frame = GetFrame(dmux, frame_num);
  if (frame == NULL) return 0;

-  return SynthesizeFrame(dmux, frame, iter);
+  return SynthesizeFrame(dmux, frame, 1, iter);
 }

 int WebPDemuxGetFrame(const WebPDemuxer* dmux, int frame, WebPIterator* iter) {
@ -879,6 +853,17 @@ int WebPDemuxPrevFrame(WebPIterator* iter) {
  return SetFrame(iter->frame_num - 1, iter);
 }

+int WebPDemuxSelectFragment(WebPIterator* iter, int fragment_num) {
+  if (iter != NULL && iter->private_ != NULL && fragment_num > 0) {
+    const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
+    const Frame* const frame = GetFrame(dmux, iter->frame_num);
+    if (frame == NULL) return 0;
+
+    return SynthesizeFrame(dmux, frame, fragment_num, iter);
+  }
+  return 0;
+}
+
 void WebPDemuxReleaseIterator(WebPIterator* iter) {
  (void)iter;
 }
@ -964,3 +949,6 @@ void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) {
  (void)iter;
 }

+#if defined(__cplusplus) || defined(c_plusplus)
+}  // extern "C"
+#endif
--- a/src/dsp/Makefile.am
+++ b/src/dsp/Makefile.am
@ -1,6 +1,5 @@
-noinst_LTLIBRARIES = libwebpdsp.la libwebpdsp_avx2.la
-noinst_LTLIBRARIES += libwebpdsp_sse2.la libwebpdspdecode_sse2.la
-noinst_LTLIBRARIES += libwebpdsp_sse41.la libwebpdspdecode_sse41.la
+AM_CPPFLAGS = -I$(top_srcdir)/src
+noinst_LTLIBRARIES = libwebpdsp.la

 if BUILD_LIBWEBPDECODER
  noinst_LTLIBRARIES += libwebpdspdecode.la
@ -10,87 +9,23 @@ common_HEADERS = ../webp/types.h
 commondir = $(includedir)/webp

 COMMON_SOURCES =
-COMMON_SOURCES += alpha_processing.c
-COMMON_SOURCES += alpha_processing_mips_dsp_r2.c
 COMMON_SOURCES += cpu.c
 COMMON_SOURCES += dec.c
-COMMON_SOURCES += dec_clip_tables.c
-COMMON_SOURCES += dec_mips32.c
-COMMON_SOURCES += dec_mips_dsp_r2.c
 COMMON_SOURCES += dec_neon.c
+COMMON_SOURCES += dec_sse2.c
 COMMON_SOURCES += dsp.h
-COMMON_SOURCES += filters.c
-COMMON_SOURCES += filters_mips_dsp_r2.c
 COMMON_SOURCES += lossless.c
 COMMON_SOURCES += lossless.h
-COMMON_SOURCES += lossless_mips_dsp_r2.c
-COMMON_SOURCES += lossless_neon.c
-COMMON_SOURCES += mips_macro.h
-COMMON_SOURCES += neon.h
-COMMON_SOURCES += rescaler.c
-COMMON_SOURCES += rescaler_mips32.c
-COMMON_SOURCES += rescaler_mips_dsp_r2.c
-COMMON_SOURCES += rescaler_neon.c
 COMMON_SOURCES += upsampling.c
-COMMON_SOURCES += upsampling_mips_dsp_r2.c
 COMMON_SOURCES += upsampling_neon.c
+COMMON_SOURCES += upsampling_sse2.c
 COMMON_SOURCES += yuv.c
 COMMON_SOURCES += yuv.h
-COMMON_SOURCES += yuv_mips32.c
-COMMON_SOURCES += yuv_mips_dsp_r2.c

 ENC_SOURCES =
-ENC_SOURCES += argb.c
-ENC_SOURCES += argb_mips_dsp_r2.c
-ENC_SOURCES += cost.c
-ENC_SOURCES += cost_mips32.c
-ENC_SOURCES += cost_mips_dsp_r2.c
 ENC_SOURCES += enc.c
-ENC_SOURCES += enc_mips32.c
-ENC_SOURCES += enc_mips_dsp_r2.c
 ENC_SOURCES += enc_neon.c
-ENC_SOURCES += lossless_enc.c
-ENC_SOURCES += lossless_enc_mips32.c
-ENC_SOURCES += lossless_enc_mips_dsp_r2.c
-ENC_SOURCES += lossless_enc_neon.c
-
-libwebpdsp_avx2_la_SOURCES =
-libwebpdsp_avx2_la_SOURCES += enc_avx2.c
-libwebpdsp_avx2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
-libwebpdsp_avx2_la_CFLAGS = $(AM_CFLAGS) $(AVX2_FLAGS)
-
-libwebpdspdecode_sse41_la_SOURCES =
-libwebpdspdecode_sse41_la_SOURCES += alpha_processing_sse41.c
-libwebpdspdecode_sse41_la_SOURCES += dec_sse41.c
-libwebpdspdecode_sse41_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
-libwebpdspdecode_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_FLAGS)
-
-libwebpdspdecode_sse2_la_SOURCES =
-libwebpdspdecode_sse2_la_SOURCES += alpha_processing_sse2.c
-libwebpdspdecode_sse2_la_SOURCES += dec_sse2.c
-libwebpdspdecode_sse2_la_SOURCES += filters_sse2.c
-libwebpdspdecode_sse2_la_SOURCES += lossless_sse2.c
-libwebpdspdecode_sse2_la_SOURCES += rescaler_sse2.c
-libwebpdspdecode_sse2_la_SOURCES += upsampling_sse2.c
-libwebpdspdecode_sse2_la_SOURCES += yuv_sse2.c
-libwebpdspdecode_sse2_la_CPPFLAGS = $(libwebpdsp_sse2_la_CPPFLAGS)
-libwebpdspdecode_sse2_la_CFLAGS = $(libwebpdsp_sse2_la_CFLAGS)
-
-libwebpdsp_sse2_la_SOURCES =
-libwebpdsp_sse2_la_SOURCES += argb_sse2.c
-libwebpdsp_sse2_la_SOURCES += cost_sse2.c
-libwebpdsp_sse2_la_SOURCES += enc_sse2.c
-libwebpdsp_sse2_la_SOURCES += lossless_enc_sse2.c
-libwebpdsp_sse2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
-libwebpdsp_sse2_la_CFLAGS = $(AM_CFLAGS) $(SSE2_FLAGS)
-libwebpdsp_sse2_la_LIBADD = libwebpdspdecode_sse2.la
-
-libwebpdsp_sse41_la_SOURCES =
-libwebpdsp_sse41_la_SOURCES += enc_sse41.c
-libwebpdsp_sse41_la_SOURCES += lossless_enc_sse41.c
-libwebpdsp_sse41_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
-libwebpdsp_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_FLAGS)
-libwebpdsp_sse41_la_LIBADD = libwebpdspdecode_sse41.la
+ENC_SOURCES += enc_sse2.c

 libwebpdsp_la_SOURCES = $(COMMON_SOURCES) $(ENC_SOURCES)

@ -98,20 +33,12 @@ noinst_HEADERS =
 noinst_HEADERS += ../dec/decode_vp8.h
 noinst_HEADERS += ../webp/decode.h

-libwebpdsp_la_CPPFLAGS =
-libwebpdsp_la_CPPFLAGS += $(AM_CPPFLAGS)
-libwebpdsp_la_CPPFLAGS += $(USE_EXPERIMENTAL_CODE) $(USE_SWAP_16BIT_CSP)
 libwebpdsp_la_LDFLAGS = -lm
-libwebpdsp_la_LIBADD =
-libwebpdsp_la_LIBADD += libwebpdsp_avx2.la libwebpdsp_sse2.la
-libwebpdsp_la_LIBADD += libwebpdsp_sse41.la
+libwebpdsp_la_CPPFLAGS = $(USE_EXPERIMENTAL_CODE) $(USE_SWAP_16BIT_CSP)

 if BUILD_LIBWEBPDECODER
  libwebpdspdecode_la_SOURCES = $(COMMON_SOURCES)

-  libwebpdspdecode_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
  libwebpdspdecode_la_LDFLAGS = $(libwebpdsp_la_LDFLAGS)
-  libwebpdspdecode_la_LIBADD =
-  libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse2.la
-  libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse41.la
+  libwebpdspdecode_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
 endif
--- a/src/dsp/alpha_processing.c
+++ b/src/dsp/alpha_processing.c
@ -1,383 +0,0 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Utilities for processing transparent channel.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include <assert.h>
-#include "./dsp.h"
-
-// Tables can be faster on some platform but incur some extra binary size (~2k).
-// #define USE_TABLES_FOR_ALPHA_MULT
-
-// -----------------------------------------------------------------------------
-
-#define MFIX 24    // 24bit fixed-point arithmetic
-#define HALF ((1u << MFIX) >> 1)
-#define KINV_255 ((1u << MFIX) / 255u)
-
-static uint32_t Mult(uint8_t x, uint32_t mult) {
-  const uint32_t v = (x * mult + HALF) >> MFIX;
-  assert(v <= 255);  // <- 24bit precision is enough to ensure that.
-  return v;
-}
-
-#ifdef USE_TABLES_FOR_ALPHA_MULT
-
-static const uint32_t kMultTables[2][256] = {
-  {    // (255u << MFIX) / alpha
-    0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
-    0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
-    0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
-    0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
-    0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
-    0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
-    0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
-    0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
-    0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
-    0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
-    0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
-    0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
-    0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
-    0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
-    0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
-    0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
-    0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
-    0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
-    0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
-    0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
-    0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
-    0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
-    0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
-    0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
-    0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
-    0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
-    0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
-    0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
-    0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
-    0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
-    0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
-    0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
-    0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
-    0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
-    0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
-    0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
-    0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
-    0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
-    0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
-    0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
-    0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
-    0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
-    0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
-  {   // alpha * KINV_255
-    0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
-    0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
-    0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
-    0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
-    0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
-    0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
-    0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
-    0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
-    0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
-    0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
-    0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
-    0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
-    0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
-    0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
-    0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
-    0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
-    0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
-    0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
-    0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
-    0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
-    0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
-    0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
-    0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
-    0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
-    0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
-    0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
-    0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
-    0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
-    0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
-    0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
-    0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
-    0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
-    0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
-    0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
-    0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
-    0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
-    0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
-    0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
-    0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
-    0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
-    0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
-    0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
-    0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
-};
-
-static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
-  return kMultTables[!inverse][a];
-}
-
-#else
-
-static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
-  return inverse ? (255u << MFIX) / a : a * KINV_255;
-}
-
-#endif    // USE_TABLES_FOR_ALPHA_MULT
-
-void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    const uint32_t argb = ptr[x];
-    if (argb < 0xff000000u) {      // alpha < 255
-      if (argb <= 0x00ffffffu) {   // alpha == 0
-        ptr[x] = 0;
-      } else {
-        const uint32_t alpha = (argb >> 24) & 0xff;
-        const uint32_t scale = GetScale(alpha, inverse);
-        uint32_t out = argb & 0xff000000u;
-        out |= Mult(argb >>  0, scale) <<  0;
-        out |= Mult(argb >>  8, scale) <<  8;
-        out |= Mult(argb >> 16, scale) << 16;
-        ptr[x] = out;
-      }
-    }
-  }
-}
-
-void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
-                  int width, int inverse) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    const uint32_t a = alpha[x];
-    if (a != 255) {
-      if (a == 0) {
-        ptr[x] = 0;
-      } else {
-        const uint32_t scale = GetScale(a, inverse);
-        ptr[x] = Mult(ptr[x], scale);
-      }
-    }
-  }
-}
-
-#undef KINV_255
-#undef HALF
-#undef MFIX
-
-void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
-void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
-                    int width, int inverse);
-
-//------------------------------------------------------------------------------
-// Generic per-plane calls
-
-void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
-                      int inverse) {
-  int n;
-  for (n = 0; n < num_rows; ++n) {
-    WebPMultARGBRow((uint32_t*)ptr, width, inverse);
-    ptr += stride;
-  }
-}
-
-void WebPMultRows(uint8_t* ptr, int stride,
-                  const uint8_t* alpha, int alpha_stride,
-                  int width, int num_rows, int inverse) {
-  int n;
-  for (n = 0; n < num_rows; ++n) {
-    WebPMultRow(ptr, alpha, width, inverse);
-    ptr += stride;
-    alpha += alpha_stride;
-  }
-}
-
-//------------------------------------------------------------------------------
-// Premultiplied modes
-
-// non dithered-modes
-
-// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
-// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
-// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
-#if 1     // (int)(x * a / 255.)
-#define MULTIPLIER(a)   ((a) * 32897U)
-#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
-#else     // (int)(x * a / 255. + .5)
-#define MULTIPLIER(a) ((a) * 65793U)
-#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
-#endif
-
-static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
-                               int w, int h, int stride) {
-  while (h-- > 0) {
-    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
-    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
-    int i;
-    for (i = 0; i < w; ++i) {
-      const uint32_t a = alpha[4 * i];
-      if (a != 0xff) {
-        const uint32_t mult = MULTIPLIER(a);
-        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
-        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
-        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
-      }
-    }
-    rgba += stride;
-  }
-}
-#undef MULTIPLIER
-#undef PREMULTIPLY
-
-// rgbA4444
-
-#define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
-
-static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
-  return (x & 0xf0) | (x >> 4);
-}
-
-static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
-  return (x & 0x0f) | (x << 4);
-}
-
-static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
-  return (x * m) >> 16;
-}
-
-static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
-                                               int w, int h, int stride,
-                                               int rg_byte_pos /* 0 or 1 */) {
-  while (h-- > 0) {
-    int i;
-    for (i = 0; i < w; ++i) {
-      const uint32_t rg = rgba4444[2 * i + rg_byte_pos];
-      const uint32_t ba = rgba4444[2 * i + (rg_byte_pos ^ 1)];
-      const uint8_t a = ba & 0x0f;
-      const uint32_t mult = MULTIPLIER(a);
-      const uint8_t r = multiply(dither_hi(rg), mult);
-      const uint8_t g = multiply(dither_lo(rg), mult);
-      const uint8_t b = multiply(dither_hi(ba), mult);
-      rgba4444[2 * i + rg_byte_pos] = (r & 0xf0) | ((g >> 4) & 0x0f);
-      rgba4444[2 * i + (rg_byte_pos ^ 1)] = (b & 0xf0) | a;
-    }
-    rgba4444 += stride;
-  }
-}
-#undef MULTIPLIER
-
-static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
-                                   int w, int h, int stride) {
-#ifdef WEBP_SWAP_16BIT_CSP
-  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
-#else
-  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
-#endif
-}
-
-static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
-                         int width, int height,
-                         uint8_t* dst, int dst_stride) {
-  uint32_t alpha_mask = 0xff;
-  int i, j;
-
-  for (j = 0; j < height; ++j) {
-    for (i = 0; i < width; ++i) {
-      const uint32_t alpha_value = alpha[i];
-      dst[4 * i] = alpha_value;
-      alpha_mask &= alpha_value;
-    }
-    alpha += alpha_stride;
-    dst += dst_stride;
-  }
-
-  return (alpha_mask != 0xff);
-}
-
-static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
-                                 int width, int height,
-                                 uint32_t* dst, int dst_stride) {
-  int i, j;
-  for (j = 0; j < height; ++j) {
-    for (i = 0; i < width; ++i) {
-      dst[i] = alpha[i] << 8;  // leave A/R/B channels zero'd.
-    }
-    alpha += alpha_stride;
-    dst += dst_stride;
-  }
-}
-
-static int ExtractAlpha(const uint8_t* argb, int argb_stride,
-                        int width, int height,
-                        uint8_t* alpha, int alpha_stride) {
-  uint8_t alpha_mask = 0xff;
-  int i, j;
-
-  for (j = 0; j < height; ++j) {
-    for (i = 0; i < width; ++i) {
-      const uint8_t alpha_value = argb[4 * i];
-      alpha[i] = alpha_value;
-      alpha_mask &= alpha_value;
-    }
-    argb += argb_stride;
-    alpha += alpha_stride;
-  }
-  return (alpha_mask == 0xff);
-}
-
-void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
-void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
-int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
-void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
-int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
-
-//------------------------------------------------------------------------------
-// Init function
-
-extern void WebPInitAlphaProcessingMIPSdspR2(void);
-extern void WebPInitAlphaProcessingSSE2(void);
-extern void WebPInitAlphaProcessingSSE41(void);
-
-static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
-    (VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
-  if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
-
-  WebPMultARGBRow = WebPMultARGBRowC;
-  WebPMultRow = WebPMultRowC;
-  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
-  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
-  WebPDispatchAlpha = DispatchAlpha;
-  WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
-  WebPExtractAlpha = ExtractAlpha;
-
-  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_SSE2)
-    if (VP8GetCPUInfo(kSSE2)) {
-      WebPInitAlphaProcessingSSE2();
-#if defined(WEBP_USE_SSE41)
-      if (VP8GetCPUInfo(kSSE4_1)) {
-        WebPInitAlphaProcessingSSE41();
-      }
-#endif
-    }
-#endif
-#if defined(WEBP_USE_MIPS_DSP_R2)
-    if (VP8GetCPUInfo(kMIPSdspR2)) {
-      WebPInitAlphaProcessingMIPSdspR2();
-    }
-#endif
-  }
-  alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
-}
--- a/src/dsp/alpha_processing_mips_dsp_r2.c
+++ b/src/dsp/alpha_processing_mips_dsp_r2.c
@ -1,141 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Utilities for processing transparent channel.
-//
-// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
-//            Djordje Pesut  (djordje.pesut@imgtec.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_MIPS_DSP_R2)
-
-static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
-                         int width, int height,
-                         uint8_t* dst, int dst_stride) {
-  uint32_t alpha_mask = 0xffffffff;
-  int i, j, temp0;
-
-  for (j = 0; j < height; ++j) {
-    uint8_t* pdst = dst;
-    const uint8_t* palpha = alpha;
-    for (i = 0; i < (width >> 2); ++i) {
-      int temp1, temp2, temp3;
-
-      __asm__ volatile (
-        "ulw    %[temp0],      0(%[palpha])                \n\t"
-        "addiu  %[palpha],     %[palpha],     4            \n\t"
-        "addiu  %[pdst],       %[pdst],       16           \n\t"
-        "srl    %[temp1],      %[temp0],      8            \n\t"
-        "srl    %[temp2],      %[temp0],      16           \n\t"
-        "srl    %[temp3],      %[temp0],      24           \n\t"
-        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
-        "sb     %[temp0],      -16(%[pdst])                \n\t"
-        "sb     %[temp1],      -12(%[pdst])                \n\t"
-        "sb     %[temp2],      -8(%[pdst])                 \n\t"
-        "sb     %[temp3],      -4(%[pdst])                 \n\t"
-        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-          [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
-          [alpha_mask]"+r"(alpha_mask)
-        :
-        : "memory"
-      );
-    }
-
-    for (i = 0; i < (width & 3); ++i) {
-      __asm__ volatile (
-        "lbu    %[temp0],      0(%[palpha])                \n\t"
-        "addiu  %[palpha],     %[palpha],     1            \n\t"
-        "sb     %[temp0],      0(%[pdst])                  \n\t"
-        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
-        "addiu  %[pdst],       %[pdst],       4            \n\t"
-        : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
-          [alpha_mask]"+r"(alpha_mask)
-        :
-        : "memory"
-      );
-    }
-    alpha += alpha_stride;
-    dst += dst_stride;
-  }
-
-  __asm__ volatile (
-    "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
-    "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
-    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
-    "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
-    "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
-    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
-    : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
-    :
-  );
-
-  return (alpha_mask != 0xff);
-}
-
-static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
-  int x;
-  const uint32_t c_00ffffff = 0x00ffffffu;
-  const uint32_t c_ff000000 = 0xff000000u;
-  const uint32_t c_8000000  = 0x00800000u;
-  const uint32_t c_8000080  = 0x00800080u;
-  for (x = 0; x < width; ++x) {
-    const uint32_t argb = ptr[x];
-    if (argb < 0xff000000u) {      // alpha < 255
-      if (argb <= 0x00ffffffu) {   // alpha == 0
-        ptr[x] = 0;
-      } else {
-        int temp0, temp1, temp2, temp3, alpha;
-        __asm__ volatile (
-          "srl          %[alpha],   %[argb],       24                \n\t"
-          "replv.qb     %[temp0],   %[alpha]                         \n\t"
-          "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
-          "beqz         %[inverse], 0f                               \n\t"
-          "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
-          "mflo         %[temp0]                                     \n\t"
-        "0:                                                          \n\t"
-          "andi         %[temp1],   %[argb],       0xff              \n\t"
-          "ext          %[temp2],   %[argb],       8,             8  \n\t"
-          "ext          %[temp3],   %[argb],       16,            8  \n\t"
-          "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
-          "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
-          "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
-          "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
-          "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
-          "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
-          "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
-          "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
-          : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-            [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
-          : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
-            [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
-            [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
-          : "memory", "hi", "lo"
-        );
-        ptr[x] = temp1;
-      }
-    }
-  }
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void WebPInitAlphaProcessingMIPSdspR2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
-  WebPDispatchAlpha = DispatchAlpha;
-  WebPMultARGBRow = MultARGBRow;
-}
-
-#else  // !WEBP_USE_MIPS_DSP_R2
-
-WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)
-
-#endif  // WEBP_USE_MIPS_DSP_R2
--- a/src/dsp/alpha_processing_sse2.c
+++ b/src/dsp/alpha_processing_sse2.c
@ -1,298 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Utilities for processing transparent channel.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_SSE2)
-#include <emmintrin.h>
-
-//------------------------------------------------------------------------------
-
-static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
-                         int width, int height,
-                         uint8_t* dst, int dst_stride) {
-  // alpha_and stores an 'and' operation of all the alpha[] values. The final
-  // value is not 0xff if any of the alpha[] is not equal to 0xff.
-  uint32_t alpha_and = 0xff;
-  int i, j;
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i rgb_mask = _mm_set1_epi32(0xffffff00u);  // to preserve RGB
-  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
-  __m128i all_alphas = all_0xff;
-
-  // We must be able to access 3 extra bytes after the last written byte
-  // 'dst[4 * width - 4]', because we don't know if alpha is the first or the
-  // last byte of the quadruplet.
-  const int limit = (width - 1) & ~7;
-
-  for (j = 0; j < height; ++j) {
-    __m128i* out = (__m128i*)dst;
-    for (i = 0; i < limit; i += 8) {
-      // load 8 alpha bytes
-      const __m128i a0 = _mm_loadl_epi64((const __m128i*)&alpha[i]);
-      const __m128i a1 = _mm_unpacklo_epi8(a0, zero);
-      const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
-      const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
-      // load 8 dst pixels (32 bytes)
-      const __m128i b0_lo = _mm_loadu_si128(out + 0);
-      const __m128i b0_hi = _mm_loadu_si128(out + 1);
-      // mask dst alpha values
-      const __m128i b1_lo = _mm_and_si128(b0_lo, rgb_mask);
-      const __m128i b1_hi = _mm_and_si128(b0_hi, rgb_mask);
-      // combine
-      const __m128i b2_lo = _mm_or_si128(b1_lo, a2_lo);
-      const __m128i b2_hi = _mm_or_si128(b1_hi, a2_hi);
-      // store
-      _mm_storeu_si128(out + 0, b2_lo);
-      _mm_storeu_si128(out + 1, b2_hi);
-      // accumulate eight alpha 'and' in parallel
-      all_alphas = _mm_and_si128(all_alphas, a0);
-      out += 2;
-    }
-    for (; i < width; ++i) {
-      const uint32_t alpha_value = alpha[i];
-      dst[4 * i] = alpha_value;
-      alpha_and &= alpha_value;
-    }
-    alpha += alpha_stride;
-    dst += dst_stride;
-  }
-  // Combine the eight alpha 'and' into a 8-bit mask.
-  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
-  return (alpha_and != 0xff);
-}
-
-static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
-                                 int width, int height,
-                                 uint32_t* dst, int dst_stride) {
-  int i, j;
-  const __m128i zero = _mm_setzero_si128();
-  const int limit = width & ~15;
-  for (j = 0; j < height; ++j) {
-    for (i = 0; i < limit; i += 16) {   // process 16 alpha bytes
-      const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
-      const __m128i a1 = _mm_unpacklo_epi8(zero, a0);  // note the 'zero' first!
-      const __m128i b1 = _mm_unpackhi_epi8(zero, a0);
-      const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
-      const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero);
-      const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
-      const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero);
-      _mm_storeu_si128((__m128i*)&dst[i +  0], a2_lo);
-      _mm_storeu_si128((__m128i*)&dst[i +  4], a2_hi);
-      _mm_storeu_si128((__m128i*)&dst[i +  8], b2_lo);
-      _mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi);
-    }
-    for (; i < width; ++i) dst[i] = alpha[i] << 8;
-    alpha += alpha_stride;
-    dst += dst_stride;
-  }
-}
-
-static int ExtractAlpha(const uint8_t* argb, int argb_stride,
-                        int width, int height,
-                        uint8_t* alpha, int alpha_stride) {
-  // alpha_and stores an 'and' operation of all the alpha[] values. The final
-  // value is not 0xff if any of the alpha[] is not equal to 0xff.
-  uint32_t alpha_and = 0xff;
-  int i, j;
-  const __m128i a_mask = _mm_set1_epi32(0xffu);  // to preserve alpha
-  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
-  __m128i all_alphas = all_0xff;
-
-  // We must be able to access 3 extra bytes after the last written byte
-  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
-  // last byte of the quadruplet.
-  const int limit = (width - 1) & ~7;
-
-  for (j = 0; j < height; ++j) {
-    const __m128i* src = (const __m128i*)argb;
-    for (i = 0; i < limit; i += 8) {
-      // load 32 argb bytes
-      const __m128i a0 = _mm_loadu_si128(src + 0);
-      const __m128i a1 = _mm_loadu_si128(src + 1);
-      const __m128i b0 = _mm_and_si128(a0, a_mask);
-      const __m128i b1 = _mm_and_si128(a1, a_mask);
-      const __m128i c0 = _mm_packs_epi32(b0, b1);
-      const __m128i d0 = _mm_packus_epi16(c0, c0);
-      // store
-      _mm_storel_epi64((__m128i*)&alpha[i], d0);
-      // accumulate eight alpha 'and' in parallel
-      all_alphas = _mm_and_si128(all_alphas, d0);
-      src += 2;
-    }
-    for (; i < width; ++i) {
-      const uint32_t alpha_value = argb[4 * i];
-      alpha[i] = alpha_value;
-      alpha_and &= alpha_value;
-    }
-    argb += argb_stride;
-    alpha += alpha_stride;
-  }
-  // Combine the eight alpha 'and' into a 8-bit mask.
-  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
-  return (alpha_and == 0xff);
-}
-
-//------------------------------------------------------------------------------
-// Non-dither premultiplied modes
-
-#define MULTIPLIER(a)   ((a) * 0x8081)
-#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
-
-// We can't use a 'const int' for the SHUFFLE value, because it has to be an
-// immediate in the _mm_shufflexx_epi16() instruction. We really a macro here.
-#define APPLY_ALPHA(RGBX, SHUFFLE, MASK, MULT) do {             \
-  const __m128i argb0 = _mm_loadl_epi64((__m128i*)&(RGBX));     \
-  const __m128i argb1 = _mm_unpacklo_epi8(argb0, zero);         \
-  const __m128i alpha0 = _mm_and_si128(argb1, MASK);            \
-  const __m128i alpha1 = _mm_shufflelo_epi16(alpha0, SHUFFLE);  \
-  const __m128i alpha2 = _mm_shufflehi_epi16(alpha1, SHUFFLE);  \
-  /* alpha2 = [0 a0 a0 a0][0 a1 a1 a1] */                       \
-  const __m128i scale0 = _mm_mullo_epi16(alpha2, MULT);         \
-  const __m128i scale1 = _mm_mulhi_epu16(alpha2, MULT);         \
-  const __m128i argb2 = _mm_mulhi_epu16(argb1, scale0);         \
-  const __m128i argb3 = _mm_mullo_epi16(argb1, scale1);         \
-  const __m128i argb4 = _mm_adds_epu16(argb2, argb3);           \
-  const __m128i argb5 = _mm_srli_epi16(argb4, 7);               \
-  const __m128i argb6 = _mm_or_si128(argb5, alpha0);            \
-  const __m128i argb7 = _mm_packus_epi16(argb6, zero);          \
-  _mm_storel_epi64((__m128i*)&(RGBX), argb7);                   \
-} while (0)
-
-static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
-                               int w, int h, int stride) {
-  const __m128i zero = _mm_setzero_si128();
-  const int kSpan = 2;
-  const int w2 = w & ~(kSpan - 1);
-  while (h-- > 0) {
-    uint32_t* const rgbx = (uint32_t*)rgba;
-    int i;
-    if (!alpha_first) {
-      const __m128i kMask = _mm_set_epi16(0xff, 0, 0, 0, 0xff, 0, 0, 0);
-      const __m128i kMult =
-          _mm_set_epi16(0, 0x8081, 0x8081, 0x8081, 0, 0x8081, 0x8081, 0x8081);
-      for (i = 0; i < w2; i += kSpan) {
-        APPLY_ALPHA(rgbx[i], _MM_SHUFFLE(0, 3, 3, 3), kMask, kMult);
-      }
-    } else {
-      const __m128i kMask = _mm_set_epi16(0, 0, 0, 0xff, 0, 0, 0, 0xff);
-      const __m128i kMult =
-          _mm_set_epi16(0x8081, 0x8081, 0x8081, 0, 0x8081, 0x8081, 0x8081, 0);
-      for (i = 0; i < w2; i += kSpan) {
-        APPLY_ALPHA(rgbx[i], _MM_SHUFFLE(0, 0, 0, 3), kMask, kMult);
-      }
-    }
-    // Finish with left-overs.
-    for (; i < w; ++i) {
-      uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
-      const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
-      const uint32_t a = alpha[4 * i];
-      if (a != 0xff) {
-        const uint32_t mult = MULTIPLIER(a);
-        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
-        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
-        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
-      }
-    }
-    rgba += stride;
-  }
-}
-#undef MULTIPLIER
-#undef PREMULTIPLY
-
-// -----------------------------------------------------------------------------
-// Apply alpha value to rows
-
-// We use: kINV255 = (1 << 24) / 255 = 0x010101
-// So: a * kINV255 = (a << 16) | [(a << 8) | a]
-// -> _mm_mulhi_epu16() takes care of the (a<<16) part,
-// and _mm_mullo_epu16(a * 0x0101,...) takes care of the "(a << 8) | a" one.
-
-static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
-  int x = 0;
-  if (!inverse) {
-    const int kSpan = 2;
-    const __m128i zero = _mm_setzero_si128();
-    const __m128i kRound =
-        _mm_set_epi16(0, 1 << 7, 1 << 7, 1 << 7, 0, 1 << 7, 1 << 7, 1 << 7);
-    const __m128i kMult =
-        _mm_set_epi16(0, 0x0101, 0x0101, 0x0101, 0, 0x0101, 0x0101, 0x0101);
-    const __m128i kOne64 = _mm_set_epi16(1u << 8, 0, 0, 0, 1u << 8, 0, 0, 0);
-    const int w2 = width & ~(kSpan - 1);
-    for (x = 0; x < w2; x += kSpan) {
-      const __m128i argb0 = _mm_loadl_epi64((__m128i*)&ptr[x]);
-      const __m128i argb1 = _mm_unpacklo_epi8(argb0, zero);
-      const __m128i tmp0 = _mm_shufflelo_epi16(argb1, _MM_SHUFFLE(3, 3, 3, 3));
-      const __m128i tmp1 = _mm_shufflehi_epi16(tmp0, _MM_SHUFFLE(3, 3, 3, 3));
-      const __m128i tmp2 = _mm_srli_epi64(tmp1, 16);
-      const __m128i scale0 = _mm_mullo_epi16(tmp1, kMult);
-      const __m128i scale1 = _mm_or_si128(tmp2, kOne64);
-      const __m128i argb2 = _mm_mulhi_epu16(argb1, scale0);
-      const __m128i argb3 = _mm_mullo_epi16(argb1, scale1);
-      const __m128i argb4 = _mm_adds_epu16(argb2, argb3);
-      const __m128i argb5 = _mm_adds_epu16(argb4, kRound);
-      const __m128i argb6 = _mm_srli_epi16(argb5, 8);
-      const __m128i argb7 = _mm_packus_epi16(argb6, zero);
-      _mm_storel_epi64((__m128i*)&ptr[x], argb7);
-    }
-  }
-  width -= x;
-  if (width > 0) WebPMultARGBRowC(ptr + x, width, inverse);
-}
-
-static void MultRow(uint8_t* const ptr, const uint8_t* const alpha,
-                    int width, int inverse) {
-  int x = 0;
-  if (!inverse) {
-    const int kSpan = 8;
-    const __m128i zero = _mm_setzero_si128();
-    const __m128i kRound = _mm_set1_epi16(1 << 7);
-    const int w2 = width & ~(kSpan - 1);
-    for (x = 0; x < w2; x += kSpan) {
-      const __m128i v0 = _mm_loadl_epi64((__m128i*)&ptr[x]);
-      const __m128i v1 = _mm_unpacklo_epi8(v0, zero);
-      const __m128i alpha0 = _mm_loadl_epi64((const __m128i*)&alpha[x]);
-      const __m128i alpha1 = _mm_unpacklo_epi8(alpha0, zero);
-      const __m128i alpha2 = _mm_unpacklo_epi8(alpha0, alpha0);
-      const __m128i v2 = _mm_mulhi_epu16(v1, alpha2);
-      const __m128i v3 = _mm_mullo_epi16(v1, alpha1);
-      const __m128i v4 = _mm_adds_epu16(v2, v3);
-      const __m128i v5 = _mm_adds_epu16(v4, kRound);
-      const __m128i v6 = _mm_srli_epi16(v5, 8);
-      const __m128i v7 = _mm_packus_epi16(v6, zero);
-      _mm_storel_epi64((__m128i*)&ptr[x], v7);
-    }
-  }
-  width -= x;
-  if (width > 0) WebPMultRowC(ptr + x, alpha + x, width, inverse);
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void WebPInitAlphaProcessingSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
-  WebPMultARGBRow = MultARGBRow;
-  WebPMultRow = MultRow;
-  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
-  WebPDispatchAlpha = DispatchAlpha;
-  WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
-  WebPExtractAlpha = ExtractAlpha;
-}
-
-#else  // !WEBP_USE_SSE2
-
-WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingSSE2)
-
-#endif  // WEBP_USE_SSE2
--- a/src/dsp/alpha_processing_sse41.c
+++ b/src/dsp/alpha_processing_sse41.c
@ -1,92 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Utilities for processing transparent channel, SSE4.1 variant.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_SSE41)
-
-#include <smmintrin.h>
-
-//------------------------------------------------------------------------------
-
-static int ExtractAlpha(const uint8_t* argb, int argb_stride,
-                        int width, int height,
-                        uint8_t* alpha, int alpha_stride) {
-  // alpha_and stores an 'and' operation of all the alpha[] values. The final
-  // value is not 0xff if any of the alpha[] is not equal to 0xff.
-  uint32_t alpha_and = 0xff;
-  int i, j;
-  const __m128i all_0xff = _mm_set1_epi32(~0u);
-  __m128i all_alphas = all_0xff;
-
-  // We must be able to access 3 extra bytes after the last written byte
-  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
-  // last byte of the quadruplet.
-  const int limit = (width - 1) & ~15;
-  const __m128i kCstAlpha0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
-                                          -1, -1, -1, -1, 12, 8, 4, 0);
-  const __m128i kCstAlpha1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
-                                          12, 8, 4, 0, -1, -1, -1, -1);
-  const __m128i kCstAlpha2 = _mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0,
-                                          -1, -1, -1, -1, -1, -1, -1, -1);
-  const __m128i kCstAlpha3 = _mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1,
-                                          -1, -1, -1, -1, -1, -1, -1, -1);
-  for (j = 0; j < height; ++j) {
-    const __m128i* src = (const __m128i*)argb;
-    for (i = 0; i < limit; i += 16) {
-      // load 64 argb bytes
-      const __m128i a0 = _mm_loadu_si128(src + 0);
-      const __m128i a1 = _mm_loadu_si128(src + 1);
-      const __m128i a2 = _mm_loadu_si128(src + 2);
-      const __m128i a3 = _mm_loadu_si128(src + 3);
-      const __m128i b0 = _mm_shuffle_epi8(a0, kCstAlpha0);
-      const __m128i b1 = _mm_shuffle_epi8(a1, kCstAlpha1);
-      const __m128i b2 = _mm_shuffle_epi8(a2, kCstAlpha2);
-      const __m128i b3 = _mm_shuffle_epi8(a3, kCstAlpha3);
-      const __m128i c0 = _mm_or_si128(b0, b1);
-      const __m128i c1 = _mm_or_si128(b2, b3);
-      const __m128i d0 = _mm_or_si128(c0, c1);
-      // store
-      _mm_storeu_si128((__m128i*)&alpha[i], d0);
-      // accumulate sixteen alpha 'and' in parallel
-      all_alphas = _mm_and_si128(all_alphas, d0);
-      src += 4;
-    }
-    for (; i < width; ++i) {
-      const uint32_t alpha_value = argb[4 * i];
-      alpha[i] = alpha_value;
-      alpha_and &= alpha_value;
-    }
-    argb += argb_stride;
-    alpha += alpha_stride;
-  }
-  // Combine the sixteen alpha 'and' into an 8-bit mask.
-  alpha_and |= 0xff00u;  // pretend the upper bits [8..15] were tested ok.
-  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
-  return (alpha_and == 0xffffu);
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void WebPInitAlphaProcessingSSE41(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
-  WebPExtractAlpha = ExtractAlpha;
-}
-
-#else  // !WEBP_USE_SSE41
-
-WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingSSE41)
-
-#endif  // WEBP_USE_SSE41
--- a/src/dsp/argb.c
+++ b/src/dsp/argb.c
@ -1,68 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   ARGB making functions.
-//
-// Author: Djordje Pesut (djordje.pesut@imgtec.com)
-
-#include "./dsp.h"
-
-static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
-  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
-}
-
-static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int len, uint32_t* out) {
-  int i;
-  for (i = 0; i < len; ++i) {
-    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
-  }
-}
-
-static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                    int len, int step, uint32_t* out) {
-  int i, offset = 0;
-  for (i = 0; i < len; ++i) {
-    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
-    offset += step;
-  }
-}
-
-void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
-                    const uint8_t*, int, uint32_t*);
-void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
-                   int, int, uint32_t*);
-
-extern void VP8EncDspARGBInitMIPSdspR2(void);
-extern void VP8EncDspARGBInitSSE2(void);
-
-static volatile VP8CPUInfo argb_last_cpuinfo_used =
-    (VP8CPUInfo)&argb_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
-  if (argb_last_cpuinfo_used == VP8GetCPUInfo) return;
-
-  VP8PackARGB = PackARGB;
-  VP8PackRGB = PackRGB;
-
-  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_SSE2)
-    if (VP8GetCPUInfo(kSSE2)) {
-      VP8EncDspARGBInitSSE2();
-    }
-#endif
-#if defined(WEBP_USE_MIPS_DSP_R2)
-    if (VP8GetCPUInfo(kMIPSdspR2)) {
-      VP8EncDspARGBInitMIPSdspR2();
-    }
-#endif
-  }
-  argb_last_cpuinfo_used = VP8GetCPUInfo;
-}
--- a/src/dsp/argb_mips_dsp_r2.c
+++ b/src/dsp/argb_mips_dsp_r2.c
@ -1,110 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   ARGB making functions (mips version).
-//
-// Author: Djordje Pesut (djordje.pesut@imgtec.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_MIPS_DSP_R2)
-
-static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int len, uint32_t* out) {
-  int temp0, temp1, temp2, temp3, offset;
-  const int rest = len & 1;
-  const uint32_t* const loop_end = out + len - rest;
-  const int step = 4;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
-}
-
-static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                    int len, int step, uint32_t* out) {
-  int temp0, temp1, temp2, offset;
-  const int rest = len & 1;
-  const int a = 0xff;
-  const uint32_t* const loop_end = out + len - rest;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspARGBInitMIPSdspR2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
-  VP8PackARGB = PackARGB;
-  VP8PackRGB = PackRGB;
-}
-
-#else  // !WEBP_USE_MIPS_DSP_R2
-
-WEBP_DSP_INIT_STUB(VP8EncDspARGBInitMIPSdspR2)
-
-#endif  // WEBP_USE_MIPS_DSP_R2
--- a/src/dsp/argb_sse2.c
+++ b/src/dsp/argb_sse2.c
@ -1,67 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   ARGB making functions (SSE2 version).
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_SSE2)
-
-#include <assert.h>
-#include <emmintrin.h>
-#include <string.h>
-
-static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
-  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
-}
-
-static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int len, uint32_t* out) {
-  if (g == r + 1) {  // RGBA input order. Need to swap R and B.
-    int i = 0;
-    const int len_max = len & ~3;  // max length processed in main loop
-    const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
-    assert(b == r + 2);
-    assert(a == r + 3);
-    for (; i < len_max; i += 4) {
-      const __m128i A = _mm_loadu_si128((const __m128i*)(r + 4 * i));
-      const __m128i B = _mm_and_si128(A, red_blue_mask);     // R 0 B 0
-      const __m128i C = _mm_andnot_si128(red_blue_mask, A);  // 0 G 0 A
-      const __m128i D = _mm_shufflelo_epi16(B, _MM_SHUFFLE(2, 3, 0, 1));
-      const __m128i E = _mm_shufflehi_epi16(D, _MM_SHUFFLE(2, 3, 0, 1));
-      const __m128i F = _mm_or_si128(E, C);
-      _mm_storeu_si128((__m128i*)(out + i), F);
-    }
-    for (; i < len; ++i) {
-      out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
-    }
-  } else {
-    assert(g == b + 1);
-    assert(r == b + 2);
-    assert(a == b + 3);
-    memcpy(out, b, len * 4);
-  }
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspARGBInitSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) {
-  VP8PackARGB = PackARGB;
-}
-
-#else  // !WEBP_USE_SSE2
-
-WEBP_DSP_INIT_STUB(VP8EncDspARGBInitSSE2)
-
-#endif  // WEBP_USE_SSE2
--- a/src/dsp/cost.c
+++ b/src/dsp/cost.c
@ -1,412 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-#include "../enc/cost.h"
-
-//------------------------------------------------------------------------------
-// Boolean-cost cost table
-
-const uint16_t VP8EntropyCost[256] = {
-  1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216,
-  1178, 1152, 1110, 1076, 1061, 1024, 1024,  992,  968,  951,
-   939,  911,  896,  878,  871,  854,  838,  820,  811,  794,
-   786,  768,  768,  752,  740,  732,  720,  709,  704,  690,
-   683,  672,  666,  655,  647,  640,  631,  622,  615,  607,
-   598,  592,  586,  576,  572,  564,  559,  555,  547,  541,
-   534,  528,  522,  512,  512,  504,  500,  494,  488,  483,
-   477,  473,  467,  461,  458,  452,  448,  443,  438,  434,
-   427,  424,  419,  415,  410,  406,  403,  399,  394,  390,
-   384,  384,  377,  374,  370,  366,  362,  359,  355,  351,
-   347,  342,  342,  336,  333,  330,  326,  323,  320,  316,
-   312,  308,  305,  302,  299,  296,  293,  288,  287,  283,
-   280,  277,  274,  272,  268,  266,  262,  256,  256,  256,
-   251,  248,  245,  242,  240,  237,  234,  232,  228,  226,
-   223,  221,  218,  216,  214,  211,  208,  205,  203,  201,
-   198,  196,  192,  191,  188,  187,  183,  181,  179,  176,
-   175,  171,  171,  168,  165,  163,  160,  159,  156,  154,
-   152,  150,  148,  146,  144,  142,  139,  138,  135,  133,
-   131,  128,  128,  125,  123,  121,  119,  117,  115,  113,
-   111,  110,  107,  105,  103,  102,  100,   98,   96,   94,
-    92,   91,   89,   86,   86,   83,   82,   80,   77,   76,
-    74,   73,   71,   69,   67,   66,   64,   63,   61,   59,
-    57,   55,   54,   52,   51,   49,   47,   46,   44,   43,
-    41,   40,   38,   36,   35,   33,   32,   30,   29,   27,
-    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,
-    10,    9,    7,    6,    4,    3
-};
-
-//------------------------------------------------------------------------------
-// Level cost tables
-
-// fixed costs for coding levels, deduce from the coding tree.
-// This is only the part that doesn't depend on the probability state.
-const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
-     0,  256,  256,  256,  256,  432,  618,  630,
-   731,  640,  640,  828,  901,  948, 1021, 1101,
-  1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
-  1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497,
-  1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358,
-  1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
-  1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679,
-  1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853,
-  1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
-  1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832,
-  1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910,
-  1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
-  1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059,
-  2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132,
-  2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
-  2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283,
-  2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200,
-  2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
-  2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
-  2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424,
-  2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
-  2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573,
-  2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651,
-  2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
-  2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572,
-  2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645,
-  2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
-  2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796,
-  2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872,
-  2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
-  2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023,
-  3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096,
-  3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
-  3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086,
-  3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164,
-  3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
-  3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313,
-  3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
-  3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
-  3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537,
-  3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848,
-  2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
-  2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999,
-  3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072,
-  3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
-  3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221,
-  3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299,
-  3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
-  3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289,
-  3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362,
-  3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
-  3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513,
-  3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589,
-  3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
-  3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740,
-  3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813,
-  3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
-  3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734,
-  3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812,
-  3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
-  3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961,
-  3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034,
-  4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
-  4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185,
-  4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102,
-  4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
-  4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253,
-  4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326,
-  4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
-  4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475,
-  4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553,
-  4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
-  4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547,
-  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
-  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
-  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
-  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
-  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
-  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
-  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
-  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
-  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
-  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
-  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
-  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
-  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
-  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
-  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
-  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
-  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
-  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
-  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
-  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
-  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
-  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
-  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
-  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
-  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
-  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
-  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
-  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
-  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
-  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
-  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
-  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
-  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
-  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
-  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
-  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
-  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
-  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
-  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
-  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
-  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
-  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
-  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
-  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
-  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
-  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
-  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
-  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
-  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
-  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
-  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
-  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
-  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
-  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
-  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
-  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
-  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
-  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
-  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
-  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
-  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
-  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
-  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
-  6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547,
-  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
-  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
-  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
-  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
-  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
-  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
-  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
-  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
-  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
-  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
-  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
-  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
-  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
-  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
-  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
-  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
-  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
-  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
-  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
-  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
-  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
-  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
-  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
-  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
-  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
-  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
-  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
-  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
-  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
-  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
-  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
-  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
-  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
-  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
-  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
-  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
-  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
-  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
-  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
-  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
-  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
-  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
-  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
-  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
-  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
-  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
-  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
-  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
-  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
-  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
-  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
-  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
-  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
-  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
-  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
-  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
-  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
-  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
-  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
-  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
-  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
-  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
-  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
-  6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
-  5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408,
-  5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486,
-  5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
-  5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635,
-  5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708,
-  5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
-  5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859,
-  5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
-  5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
-  5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927,
-  5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000,
-  6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
-  6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149,
-  6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227,
-  6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
-  6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148,
-  6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221,
-  6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
-  6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372,
-  6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448,
-  6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
-  6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599,
-  6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672,
-  6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
-  6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662,
-  6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740,
-  6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
-  6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889,
-  6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962,
-  6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
-  7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113,
-  7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424,
-  6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
-  6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575,
-  6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648,
-  6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
-  6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797,
-  6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875,
-  6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
-  6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865,
-  6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938,
-  6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
-  7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089,
-  7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165,
-  7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
-  7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
-  7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389,
-  7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
-  7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310,
-  7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388,
-  7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
-  7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537,
-  7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610,
-  7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
-  7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761
-};
-
-//------------------------------------------------------------------------------
-// Tables for level coding
-
-const uint8_t VP8EncBands[16 + 1] = {
-  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
-  0  // sentinel
-};
-
-//------------------------------------------------------------------------------
-// Mode costs
-
-static int GetResidualCost(int ctx0, const VP8Residual* const res) {
-  int n = res->first;
-  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
-  const int p0 = res->prob[n][ctx0][0];
-  CostArrayPtr const costs = res->costs;
-  const uint16_t* t = costs[n][ctx0];
-  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
-  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
-  // be missing during the loop.
-  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
-
-  if (res->last < 0) {
-    return VP8BitCost(0, p0);
-  }
-  for (; n < res->last; ++n) {
-    const int v = abs(res->coeffs[n]);
-    const int ctx = (v >= 2) ? 2 : v;
-    cost += VP8LevelCost(t, v);
-    t = costs[n + 1][ctx];
-  }
-  // Last coefficient is always non-zero
-  {
-    const int v = abs(res->coeffs[n]);
-    assert(v != 0);
-    cost += VP8LevelCost(t, v);
-    if (n < 15) {
-      const int b = VP8EncBands[n + 1];
-      const int ctx = (v == 1) ? 1 : 2;
-      const int last_p0 = res->prob[b][ctx][0];
-      cost += VP8BitCost(0, last_p0);
-    }
-  }
-  return cost;
-}
-
-static void SetResidualCoeffs(const int16_t* const coeffs,
-                              VP8Residual* const res) {
-  int n;
-  res->last = -1;
-  assert(res->first == 0 || coeffs[0] == 0);
-  for (n = 15; n >= 0; --n) {
-    if (coeffs[n]) {
-      res->last = n;
-      break;
-    }
-  }
-  res->coeffs = coeffs;
-}
-
-//------------------------------------------------------------------------------
-// init function
-
-VP8GetResidualCostFunc VP8GetResidualCost;
-VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
-
-extern void VP8EncDspCostInitMIPS32(void);
-extern void VP8EncDspCostInitMIPSdspR2(void);
-extern void VP8EncDspCostInitSSE2(void);
-
-static volatile VP8CPUInfo cost_last_cpuinfo_used =
-    (VP8CPUInfo)&cost_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
-  if (cost_last_cpuinfo_used == VP8GetCPUInfo) return;
-
-  VP8GetResidualCost = GetResidualCost;
-  VP8SetResidualCoeffs = SetResidualCoeffs;
-
-  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_MIPS32)
-    if (VP8GetCPUInfo(kMIPS32)) {
-      VP8EncDspCostInitMIPS32();
-    }
-#endif
-#if defined(WEBP_USE_MIPS_DSP_R2)
-    if (VP8GetCPUInfo(kMIPSdspR2)) {
-      VP8EncDspCostInitMIPSdspR2();
-    }
-#endif
-#if defined(WEBP_USE_SSE2)
-    if (VP8GetCPUInfo(kSSE2)) {
-      VP8EncDspCostInitSSE2();
-    }
-#endif
-  }
-
-  cost_last_cpuinfo_used = VP8GetCPUInfo;
-}
-
-//------------------------------------------------------------------------------
--- a/src/dsp/cost_mips32.c
+++ b/src/dsp/cost_mips32.c
@ -1,154 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Author: Djordje Pesut (djordje.pesut@imgtec.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_MIPS32)
-
-#include "../enc/cost.h"
-
-static int GetResidualCost(int ctx0, const VP8Residual* const res) {
-  int temp0, temp1;
-  int v_reg, ctx_reg;
-  int n = res->first;
-  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
-  int p0 = res->prob[n][ctx0][0];
-  CostArrayPtr const costs = res->costs;
-  const uint16_t* t = costs[n][ctx0];
-  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
-  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
-  // be missing during the loop.
-  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
-  const int16_t* res_coeffs = res->coeffs;
-  const int res_last = res->last;
-  const int const_max_level = MAX_VARIABLE_LEVEL;
-  const int const_2 = 2;
-  const uint16_t** p_costs = &costs[n][0];
-  const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs);
-
-  if (res->last < 0) {
-    return VP8BitCost(0, p0);
-  }
-
-  __asm__ volatile (
-    ".set      push                                                        \n\t"
-    ".set      noreorder                                                   \n\t"
-    "subu      %[temp1],        %[res_last],        %[n]                   \n\t"
-    "sll       %[temp0],        %[n],               1                      \n\t"
-    "blez      %[temp1],        2f                                         \n\t"
-    " addu     %[res_coeffs],   %[res_coeffs],      %[temp0]               \n\t"
-  "1:                                                                      \n\t"
-    "lh        %[v_reg],        0(%[res_coeffs])                           \n\t"
-    "addiu     %[n],            %[n],               1                      \n\t"
-    "negu      %[temp0],        %[v_reg]                                   \n\t"
-    "slti      %[temp1],        %[v_reg],           0                      \n\t"
-    "movn      %[v_reg],        %[temp0],           %[temp1]               \n\t"
-    "sltiu     %[temp0],        %[v_reg],           2                      \n\t"
-    "move      %[ctx_reg],      %[v_reg]                                   \n\t"
-    "movz      %[ctx_reg],      %[const_2],         %[temp0]               \n\t"
-    "sll       %[temp1],        %[v_reg],           1                      \n\t"
-    "addu      %[temp1],        %[temp1],           %[VP8LevelFixedCosts]  \n\t"
-    "lhu       %[temp1],        0(%[temp1])                                \n\t"
-    "slt       %[temp0],        %[v_reg],           %[const_max_level]     \n\t"
-    "movz      %[v_reg],        %[const_max_level], %[temp0]               \n\t"
-    "addu      %[cost],         %[cost],            %[temp1]               \n\t"
-    "sll       %[v_reg],        %[v_reg],           1                      \n\t"
-    "sll       %[ctx_reg],      %[ctx_reg],         2                      \n\t"
-    "addu      %[v_reg],        %[v_reg],           %[t]                   \n\t"
-    "lhu       %[temp0],        0(%[v_reg])                                \n\t"
-    "addu      %[p_costs],      %[p_costs],         %[inc_p_costs]         \n\t"
-    "addu      %[t],            %[p_costs],         %[ctx_reg]             \n\t"
-    "addu      %[cost],         %[cost],            %[temp0]               \n\t"
-    "addiu     %[res_coeffs],   %[res_coeffs],      2                      \n\t"
-    "bne       %[n],            %[res_last],        1b                     \n\t"
-    " lw       %[t],            0(%[t])                                    \n\t"
-  "2:                                                                      \n\t"
-    ".set      pop                                                         \n\t"
-    : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
-      [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
-      [temp1]"=&r"(temp1), [res_coeffs]"+&r"(res_coeffs)
-    : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
-      [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
-      [inc_p_costs]"r"(inc_p_costs)
-    : "memory"
-  );
-
-  // Last coefficient is always non-zero
-  {
-    const int v = abs(res->coeffs[n]);
-    assert(v != 0);
-    cost += VP8LevelCost(t, v);
-    if (n < 15) {
-      const int b = VP8EncBands[n + 1];
-      const int ctx = (v == 1) ? 1 : 2;
-      const int last_p0 = res->prob[b][ctx][0];
-      cost += VP8BitCost(0, last_p0);
-    }
-  }
-  return cost;
-}
-
-static void SetResidualCoeffs(const int16_t* const coeffs,
-                              VP8Residual* const res) {
-  const int16_t* p_coeffs = (int16_t*)coeffs;
-  int temp0, temp1, temp2, n, n1;
-  assert(res->first == 0 || coeffs[0] == 0);
-
-  __asm__ volatile (
-    ".set     push                                      \n\t"
-    ".set     noreorder                                 \n\t"
-    "addiu    %[p_coeffs],   %[p_coeffs],    28         \n\t"
-    "li       %[n],          15                         \n\t"
-    "li       %[temp2],      -1                         \n\t"
-  "0:                                                   \n\t"
-    "ulw      %[temp0],      0(%[p_coeffs])             \n\t"
-    "beqz     %[temp0],      1f                         \n\t"
-#if defined(WORDS_BIGENDIAN)
-    " sll     %[temp1],      %[temp0],       16         \n\t"
-#else
-    " srl     %[temp1],      %[temp0],       16         \n\t"
-#endif
-    "addiu    %[n1],         %[n],           -1         \n\t"
-    "movz     %[temp0],      %[n1],          %[temp1]   \n\t"
-    "movn     %[temp0],      %[n],           %[temp1]   \n\t"
-    "j        2f                                        \n\t"
-    " addiu   %[temp2],      %[temp0],       0          \n\t"
-  "1:                                                   \n\t"
-    "addiu    %[n],          %[n],           -2         \n\t"
-    "bgtz     %[n],          0b                         \n\t"
-    " addiu   %[p_coeffs],   %[p_coeffs],    -4         \n\t"
-  "2:                                                   \n\t"
-    ".set     pop                                       \n\t"
-    : [p_coeffs]"+&r"(p_coeffs), [temp0]"=&r"(temp0),
-      [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [n]"=&r"(n), [n1]"=&r"(n1)
-    :
-    : "memory"
-  );
-  res->last = temp2;
-  res->coeffs = coeffs;
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspCostInitMIPS32(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) {
-  VP8GetResidualCost = GetResidualCost;
-  VP8SetResidualCoeffs = SetResidualCoeffs;
-}
-
-#else  // !WEBP_USE_MIPS32
-
-WEBP_DSP_INIT_STUB(VP8EncDspCostInitMIPS32)
-
-#endif  // WEBP_USE_MIPS32
--- a/src/dsp/cost_mips_dsp_r2.c
+++ b/src/dsp/cost_mips_dsp_r2.c
@ -1,107 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Author: Djordje Pesut (djordje.pesut@imgtec.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_MIPS_DSP_R2)
-
-#include "../enc/cost.h"
-
-static int GetResidualCost(int ctx0, const VP8Residual* const res) {
-  int temp0, temp1;
-  int v_reg, ctx_reg;
-  int n = res->first;
-  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
-  int p0 = res->prob[n][ctx0][0];
-  CostArrayPtr const costs = res->costs;
-  const uint16_t* t = costs[n][ctx0];
-  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
-  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
-  // be missing during the loop.
-  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
-  const int16_t* res_coeffs = res->coeffs;
-  const int res_last = res->last;
-  const int const_max_level = MAX_VARIABLE_LEVEL;
-  const int const_2 = 2;
-  const uint16_t** p_costs = &costs[n][0];
-  const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs);
-
-  if (res->last < 0) {
-    return VP8BitCost(0, p0);
-  }
-
-  __asm__ volatile (
-    ".set      push                                                     \n\t"
-    ".set      noreorder                                                \n\t"
-    "subu      %[temp1],        %[res_last],        %[n]                \n\t"
-    "blez      %[temp1],        2f                                      \n\t"
-    " nop                                                               \n\t"
-  "1:                                                                   \n\t"
-    "sll       %[temp0],        %[n],               1                   \n\t"
-    "lhx       %[v_reg],        %[temp0](%[res_coeffs])                 \n\t"
-    "addiu     %[n],            %[n],               1                   \n\t"
-    "absq_s.w  %[v_reg],        %[v_reg]                                \n\t"
-    "sltiu     %[temp0],        %[v_reg],           2                   \n\t"
-    "move      %[ctx_reg],      %[v_reg]                                \n\t"
-    "movz      %[ctx_reg],      %[const_2],         %[temp0]            \n\t"
-    "sll       %[temp1],        %[v_reg],           1                   \n\t"
-    "lhx       %[temp1],        %[temp1](%[VP8LevelFixedCosts])         \n\t"
-    "slt       %[temp0],        %[v_reg],           %[const_max_level]  \n\t"
-    "movz      %[v_reg],        %[const_max_level], %[temp0]            \n\t"
-    "addu      %[cost],         %[cost],            %[temp1]            \n\t"
-    "sll       %[v_reg],        %[v_reg],           1                   \n\t"
-    "sll       %[ctx_reg],      %[ctx_reg],         2                   \n\t"
-    "lhx       %[temp0],        %[v_reg](%[t])                          \n\t"
-    "addu      %[p_costs],      %[p_costs],         %[inc_p_costs]      \n\t"
-    "addu      %[t],            %[p_costs],         %[ctx_reg]          \n\t"
-    "addu      %[cost],         %[cost],            %[temp0]            \n\t"
-    "bne       %[n],            %[res_last],        1b                  \n\t"
-    " lw       %[t],            0(%[t])                                 \n\t"
-  "2:                                                                   \n\t"
-    ".set      pop                                                      \n\t"
-    : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
-      [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
-      [temp1]"=&r"(temp1)
-    : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
-      [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
-      [res_coeffs]"r"(res_coeffs), [inc_p_costs]"r"(inc_p_costs)
-    : "memory"
-  );
-
-  // Last coefficient is always non-zero
-  {
-    const int v = abs(res->coeffs[n]);
-    assert(v != 0);
-    cost += VP8LevelCost(t, v);
-    if (n < 15) {
-      const int b = VP8EncBands[n + 1];
-      const int ctx = (v == 1) ? 1 : 2;
-      const int last_p0 = res->prob[b][ctx][0];
-      cost += VP8BitCost(0, last_p0);
-    }
-  }
-  return cost;
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspCostInitMIPSdspR2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void) {
-  VP8GetResidualCost = GetResidualCost;
-}
-
-#else  // !WEBP_USE_MIPS_DSP_R2
-
-WEBP_DSP_INIT_STUB(VP8EncDspCostInitMIPSdspR2)
-
-#endif  // WEBP_USE_MIPS_DSP_R2
--- a/src/dsp/cost_sse2.c
+++ b/src/dsp/cost_sse2.c
@ -1,119 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// SSE2 version of cost functions
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_SSE2)
-#include <emmintrin.h>
-
-#include "../enc/cost.h"
-#include "../enc/vp8enci.h"
-#include "../utils/utils.h"
-
-//------------------------------------------------------------------------------
-
-static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
-                                  VP8Residual* const res) {
-  const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
-  const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
-  // Use SSE2 to compare 16 values with a single instruction.
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i m0 = _mm_packs_epi16(c0, c1);
-  const __m128i m1 = _mm_cmpeq_epi8(m0, zero);
-  // Get the comparison results as a bitmask into 16bits. Negate the mask to get
-  // the position of entries that are not equal to zero. We don't need to mask
-  // out least significant bits according to res->first, since coeffs[0] is 0
-  // if res->first > 0.
-  const uint32_t mask = 0x0000ffffu ^ (uint32_t)_mm_movemask_epi8(m1);
-  // The position of the most significant non-zero bit indicates the position of
-  // the last non-zero value.
-  assert(res->first == 0 || coeffs[0] == 0);
-  res->last = mask ? BitsLog2Floor(mask) : -1;
-  res->coeffs = coeffs;
-}
-
-static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
-  uint8_t levels[16], ctxs[16];
-  uint16_t abs_levels[16];
-  int n = res->first;
-  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
-  const int p0 = res->prob[n][ctx0][0];
-  CostArrayPtr const costs = res->costs;
-  const uint16_t* t = costs[n][ctx0];
-  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
-  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
-  // be missing during the loop.
-  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
-
-  if (res->last < 0) {
-    return VP8BitCost(0, p0);
-  }
-
-  {   // precompute clamped levels and contexts, packed to 8b.
-    const __m128i zero = _mm_setzero_si128();
-    const __m128i kCst2 = _mm_set1_epi8(2);
-    const __m128i kCst67 = _mm_set1_epi8(MAX_VARIABLE_LEVEL);
-    const __m128i c0 = _mm_loadu_si128((const __m128i*)&res->coeffs[0]);
-    const __m128i c1 = _mm_loadu_si128((const __m128i*)&res->coeffs[8]);
-    const __m128i D0 = _mm_sub_epi16(zero, c0);
-    const __m128i D1 = _mm_sub_epi16(zero, c1);
-    const __m128i E0 = _mm_max_epi16(c0, D0);   // abs(v), 16b
-    const __m128i E1 = _mm_max_epi16(c1, D1);
-    const __m128i F = _mm_packs_epi16(E0, E1);
-    const __m128i G = _mm_min_epu8(F, kCst2);    // context = 0,1,2
-    const __m128i H = _mm_min_epu8(F, kCst67);   // clamp_level in [0..67]
-
-    _mm_storeu_si128((__m128i*)&ctxs[0], G);
-    _mm_storeu_si128((__m128i*)&levels[0], H);
-
-    _mm_storeu_si128((__m128i*)&abs_levels[0], E0);
-    _mm_storeu_si128((__m128i*)&abs_levels[8], E1);
-  }
-  for (; n < res->last; ++n) {
-    const int ctx = ctxs[n];
-    const int level = levels[n];
-    const int flevel = abs_levels[n];   // full level
-    cost += VP8LevelFixedCosts[flevel] + t[level];  // simplified VP8LevelCost()
-    t = costs[n + 1][ctx];
-  }
-  // Last coefficient is always non-zero
-  {
-    const int level = levels[n];
-    const int flevel = abs_levels[n];
-    assert(flevel != 0);
-    cost += VP8LevelFixedCosts[flevel] + t[level];
-    if (n < 15) {
-      const int b = VP8EncBands[n + 1];
-      const int ctx = ctxs[n];
-      const int last_p0 = res->prob[b][ctx][0];
-      cost += VP8BitCost(0, last_p0);
-    }
-  }
-  return cost;
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspCostInitSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {
-  VP8SetResidualCoeffs = SetResidualCoeffsSSE2;
-  VP8GetResidualCost = GetResidualCostSSE2;
-}
-
-#else  // !WEBP_USE_SSE2
-
-WEBP_DSP_INIT_STUB(VP8EncDspCostInitSSE2)
-
-#endif  // WEBP_USE_SSE2
--- a/src/dsp/cpu.c
+++ b/src/dsp/cpu.c
@ -13,10 +13,14 @@

 #include "./dsp.h"

-#if defined(WEBP_ANDROID_NEON)
+#if defined(__ANDROID__)
 #include <cpu-features.h>
 #endif

+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 //------------------------------------------------------------------------------
 // SSE2 detection.
 //
@ -29,78 +33,22 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
    "cpuid\n"
    "xchg %%edi, %%ebx\n"
    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type), "c"(0));
-}
-#elif defined(__x86_64__) && \
-      (defined(__code_model_medium__) || defined(__code_model_large__)) && \
-      defined(__PIC__)
-static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
-  __asm__ volatile (
-    "xchg{q}\t{%%rbx}, %q1\n"
-    "cpuid\n"
-    "xchg{q}\t{%%rbx}, %q1\n"
-    : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]),
-      "=d"(cpu_info[3])
-    : "a"(info_type), "c"(0));
+    : "a"(info_type));
 }
 #elif defined(__i386__) || defined(__x86_64__)
 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
  __asm__ volatile (
    "cpuid\n"
    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type), "c"(0));
+    : "a"(info_type));
 }
-#elif (defined(_M_X64) || defined(_M_IX86)) && \
-      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1
-#include <intrin.h>
-#define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0
 #elif defined(WEBP_MSC_SSE2)
 #define GetCPUInfo __cpuid
 #endif

-// NaCl has no support for xgetbv or the raw opcode.
-#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
-static WEBP_INLINE uint64_t xgetbv(void) {
-  const uint32_t ecx = 0;
-  uint32_t eax, edx;
-  // Use the raw opcode for xgetbv for compatibility with older toolchains.
-  __asm__ volatile (
-    ".byte 0x0f, 0x01, 0xd0\n"
-    : "=a"(eax), "=d"(edx) : "c" (ecx));
-  return ((uint64_t)edx << 32) | eax;
-}
-#elif (defined(_M_X64) || defined(_M_IX86)) && \
-      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
-#include <immintrin.h>
-#define xgetbv() _xgetbv(0)
-#elif defined(_MSC_VER) && defined(_M_IX86)
-static WEBP_INLINE uint64_t xgetbv(void) {
-  uint32_t eax_, edx_;
-  __asm {
-    xor ecx, ecx  // ecx = 0
-    // Use the raw opcode for xgetbv for compatibility with older toolchains.
-    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
-    mov eax_, eax
-    mov edx_, edx
-  }
-  return ((uint64_t)edx_ << 32) | eax_;
-}
-#else
-#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
-#endif
-
 #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
 static int x86CPUInfo(CPUFeature feature) {
-  int max_cpuid_value;
  int cpu_info[4];
-
-  // get the highest feature value cpuid supports
-  GetCPUInfo(cpu_info, 0);
-  max_cpuid_value = cpu_info[0];
-  if (max_cpuid_value < 1) {
-    return 0;
-  }
-
  GetCPUInfo(cpu_info, 1);
  if (feature == kSSE2) {
    return 0 != (cpu_info[3] & 0x04000000);
@ -108,26 +56,10 @@ static int x86CPUInfo(CPUFeature feature) {
  if (feature == kSSE3) {
    return 0 != (cpu_info[2] & 0x00000001);
  }
-  if (feature == kSSE4_1) {
-    return 0 != (cpu_info[2] & 0x00080000);
-  }
-  if (feature == kAVX) {
-    // bits 27 (OSXSAVE) & 28 (256-bit AVX)
-    if ((cpu_info[2] & 0x18000000) == 0x18000000) {
-      // XMM state and YMM state enabled by the OS.
-      return (xgetbv() & 0x6) == 0x6;
-    }
-  }
-  if (feature == kAVX2) {
-    if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
-      GetCPUInfo(cpu_info, 7);
-      return ((cpu_info[1] & 0x00000020) == 0x00000020);
-    }
-  }
  return 0;
 }
 VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
-#elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
+#elif defined(WEBP_ANDROID_NEON)
 static int AndroidCPUInfo(CPUFeature feature) {
  const AndroidCpuFamily cpu_family = android_getCpuFamily();
  const uint64_t cpu_features = android_getCpuFeatures();
@ -138,7 +70,7 @@ static int AndroidCPUInfo(CPUFeature feature) {
  return 0;
 }
 VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
-#elif defined(WEBP_USE_NEON)
+#elif defined(__ARM_NEON__)
 // define a dummy function to enable turning off NEON at runtime by setting
 // VP8DecGetCPUInfo = NULL
 static int armCPUInfo(CPUFeature feature) {
@ -146,17 +78,10 @@ static int armCPUInfo(CPUFeature feature) {
  return 1;
 }
 VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
-#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2)
-static int mipsCPUInfo(CPUFeature feature) {
-  if ((feature == kMIPS32) || (feature == kMIPSdspR2)) {
-    return 1;
-  } else {
-    return 0;
-  }
-
-}
-VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
 #else
 VP8CPUInfo VP8GetCPUInfo = NULL;
 #endif

+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dsp/dec.c
+++ b/src/dsp/dec.c
@ -7,14 +7,49 @@
 // be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
-// Speed-critical decoding functions, default plain-C implementations.
+// Speed-critical decoding functions.
 //
 // Author: Skal (pascal.massimino@gmail.com)

 #include "./dsp.h"
 #include "../dec/vp8i.h"

+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 //------------------------------------------------------------------------------
+// run-time tables (~4k)
+
+static uint8_t abs0[255 + 255 + 1];     // abs(i)
+static uint8_t abs1[255 + 255 + 1];     // abs(i)>>1
+static int8_t sclip1[1020 + 1020 + 1];  // clips [-1020, 1020] to [-128, 127]
+static int8_t sclip2[112 + 112 + 1];    // clips [-112, 112] to [-16, 15]
+static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
+
+// We declare this variable 'volatile' to prevent instruction reordering
+// and make sure it's set to true _last_ (so as to be thread-safe)
+static volatile int tables_ok = 0;
+
+static void DspInitTables(void) {
+  if (!tables_ok) {
+    int i;
+    for (i = -255; i <= 255; ++i) {
+      abs0[255 + i] = (i < 0) ? -i : i;
+      abs1[255 + i] = abs0[255 + i] >> 1;
+    }
+    for (i = -1020; i <= 1020; ++i) {
+      sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
+    }
+    for (i = -112; i <= 112; ++i) {
+      sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
+    }
+    for (i = -255; i <= 255 + 255; ++i) {
+      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+    }
+    tables_ok = 1;
+  }
+}

 static WEBP_INLINE uint8_t clip_8b(int v) {
  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
@ -26,16 +61,9 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 #define STORE(x, y, v) \
  dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))

-#define STORE2(y, dc, d, c) do {    \
-  const int DC = (dc);              \
-  STORE(0, y, DC + (d));            \
-  STORE(1, y, DC + (c));            \
-  STORE(2, y, DC - (c));            \
-  STORE(3, y, DC - (d));            \
-} while (0)
-
-#define MUL1(a) ((((a) * 20091) >> 16) + (a))
-#define MUL2(a) (((a) * 35468) >> 16)
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+#define MUL(a, b) (((a) * (b)) >> 16)

 static void TransformOne(const int16_t* in, uint8_t* dst) {
  int C[4 * 4], *tmp;
@ -44,8 +72,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
  for (i = 0; i < 4; ++i) {    // vertical pass
    const int a = in[0] + in[8];    // [-4096, 4094]
    const int b = in[0] - in[8];    // [-4095, 4095]
-    const int c = MUL2(in[4]) - MUL1(in[12]);   // [-3783, 3783]
-    const int d = MUL1(in[4]) + MUL2(in[12]);   // [-3785, 3781]
+    const int c = MUL(in[4], kC2) - MUL(in[12], kC1);   // [-3783, 3783]
+    const int d = MUL(in[4], kC1) + MUL(in[12], kC2);   // [-3785, 3781]
    tmp[0] = a + d;   // [-7881, 7875]
    tmp[1] = b + c;   // [-7878, 7878]
    tmp[2] = b - c;   // [-7878, 7878]
@ -54,7 +82,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
    in++;
  }
  // Each pass is expanding the dynamic range by ~3.85 (upper bound).
-  // The exact value is (2. + (20091 + 35468) / 65536).
+  // The exact value is (2. + (kC1 + kC2) / 65536).
  // After the second pass, maximum interval is [-3794, 3794], assuming
  // an input in [-2048, 2047] interval. We then need to add a dst value
  // in the [0, 255] range.
@ -65,8 +93,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
    const int dc = tmp[0] + 4;
    const int a =  dc +  tmp[8];
    const int b =  dc -  tmp[8];
-    const int c = MUL2(tmp[4]) - MUL1(tmp[12]);
-    const int d = MUL1(tmp[4]) + MUL2(tmp[12]);
+    const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
+    const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
    STORE(0, 0, a + d);
    STORE(1, 0, b + c);
    STORE(2, 0, b - c);
@ -75,22 +103,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
    dst += BPS;
  }
 }
-
-// Simplified transform when only in[0], in[1] and in[4] are non-zero
-static void TransformAC3(const int16_t* in, uint8_t* dst) {
-  const int a = in[0] + 4;
-  const int c4 = MUL2(in[4]);
-  const int d4 = MUL1(in[4]);
-  const int c1 = MUL2(in[1]);
-  const int d1 = MUL1(in[1]);
-  STORE2(0, a + d4, d1, c1);
-  STORE2(1, a + c4, d1, c1);
-  STORE2(2, a - c4, d1, c1);
-  STORE2(3, a - d4, d1, c1);
-}
-#undef MUL1
-#undef MUL2
-#undef STORE2
+#undef MUL

 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
  TransformOne(in, dst);
@ -104,7 +117,7 @@ static void TransformUV(const int16_t* in, uint8_t* dst) {
  VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
 }

-static void TransformDC(const int16_t* in, uint8_t* dst) {
+static void TransformDC(const int16_t *in, uint8_t* dst) {
  const int DC = in[0] + 4;
  int i, j;
  for (j = 0; j < 4; ++j) {
@ -115,10 +128,10 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
 }

 static void TransformDCUV(const int16_t* in, uint8_t* dst) {
-  if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
-  if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
-  if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
-  if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4);
+  if (in[0 * 16]) TransformDC(in + 0 * 16, dst);
+  if (in[1 * 16]) TransformDC(in + 1 * 16, dst + 4);
+  if (in[2 * 16]) TransformDC(in + 2 * 16, dst + 4 * BPS);
+  if (in[3 * 16]) TransformDC(in + 3 * 16, dst + 4 * BPS + 4);
 }

 #undef STORE
@ -153,16 +166,16 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
  }
 }

-void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
+void (*VP8TransformWHT)(const int16_t* in, int16_t* out) = TransformWHT;

 //------------------------------------------------------------------------------
 // Intra predictions

 #define DST(x, y) dst[(x) + (y) * BPS]

-static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
+static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) {
  const uint8_t* top = dst - BPS;
-  const uint8_t* const clip0 = VP8kclip1 - top[-1];
+  const uint8_t* const clip0 = clip1 + 255 - top[-1];
  int y;
  for (y = 0; y < size; ++y) {
    const uint8_t* const clip = clip0 + dst[-1];
@ -173,21 +186,21 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
    dst += BPS;
  }
 }
-static void TM4(uint8_t* dst)   { TrueMotion(dst, 4); }
-static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
-static void TM16(uint8_t* dst)  { TrueMotion(dst, 16); }
+static void TM4(uint8_t *dst)   { TrueMotion(dst, 4); }
+static void TM8uv(uint8_t *dst) { TrueMotion(dst, 8); }
+static void TM16(uint8_t *dst)  { TrueMotion(dst, 16); }

 //------------------------------------------------------------------------------
 // 16x16

-static void VE16(uint8_t* dst) {     // vertical
+static void VE16(uint8_t *dst) {     // vertical
  int j;
  for (j = 0; j < 16; ++j) {
    memcpy(dst + j * BPS, dst - BPS, 16);
  }
 }

-static void HE16(uint8_t* dst) {     // horizontal
+static void HE16(uint8_t *dst) {     // horizontal
  int j;
  for (j = 16; j > 0; --j) {
    memset(dst, dst[-1], 16);
@ -202,7 +215,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
  }
 }

-static void DC16(uint8_t* dst) {    // DC
+static void DC16(uint8_t *dst) {    // DC
  int DC = 16;
  int j;
  for (j = 0; j < 16; ++j) {
@ -211,7 +224,7 @@ static void DC16(uint8_t* dst) {    // DC
  Put16(DC >> 5, dst);
 }

-static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
+static void DC16NoTop(uint8_t *dst) {   // DC with top samples not available
  int DC = 8;
  int j;
  for (j = 0; j < 16; ++j) {
@ -220,7 +233,7 @@ static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
  Put16(DC >> 4, dst);
 }

-static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
+static void DC16NoLeft(uint8_t *dst) {  // DC with left samples not available
  int DC = 8;
  int i;
  for (i = 0; i < 16; ++i) {
@ -229,19 +242,17 @@ static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
  Put16(DC >> 4, dst);
 }

-static void DC16NoTopLeft(uint8_t* dst) {  // DC with no top and left samples
+static void DC16NoTopLeft(uint8_t *dst) {  // DC with no top and left samples
  Put16(0x80, dst);
 }

-VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
-
 //------------------------------------------------------------------------------
 // 4x4

 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)

-static void VE4(uint8_t* dst) {    // vertical
+static void VE4(uint8_t *dst) {    // vertical
  const uint8_t* top = dst - BPS;
  const uint8_t vals[4] = {
    AVG3(top[-1], top[0], top[1]),
@ -255,19 +266,19 @@ static void VE4(uint8_t* dst) {    // vertical
  }
 }

-static void HE4(uint8_t* dst) {    // horizontal
+static void HE4(uint8_t *dst) {    // horizontal
  const int A = dst[-1 - BPS];
  const int B = dst[-1];
  const int C = dst[-1 + BPS];
  const int D = dst[-1 + 2 * BPS];
  const int E = dst[-1 + 3 * BPS];
-  WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(A, B, C));
-  WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(B, C, D));
-  WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(C, D, E));
-  WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
+  *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(A, B, C);
+  *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(B, C, D);
+  *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(C, D, E);
+  *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(D, E, E);
 }

-static void DC4(uint8_t* dst) {   // DC
+static void DC4(uint8_t *dst) {   // DC
  uint32_t dc = 4;
  int i;
  for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
@ -275,7 +286,7 @@ static void DC4(uint8_t* dst) {   // DC
  for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
 }

-static void RD4(uint8_t* dst) {   // Down-right
+static void RD4(uint8_t *dst) {   // Down-right
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@ -286,15 +297,15 @@ static void RD4(uint8_t* dst) {   // Down-right
  const int C = dst[2 - BPS];
  const int D = dst[3 - BPS];
  DST(0, 3)                                     = AVG3(J, K, L);
-  DST(1, 3) = DST(0, 2)                         = AVG3(I, J, K);
-  DST(2, 3) = DST(1, 2) = DST(0, 1)             = AVG3(X, I, J);
-  DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
-              DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
-                          DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
-                                      DST(3, 0) = AVG3(D, C, B);
+  DST(0, 2) = DST(1, 3)                         = AVG3(I, J, K);
+  DST(0, 1) = DST(1, 2) = DST(2, 3)             = AVG3(X, I, J);
+  DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
+  DST(1, 0) = DST(2, 1) = DST(3, 2)             = AVG3(B, A, X);
+  DST(2, 0) = DST(3, 1)                         = AVG3(C, B, A);
+  DST(3, 0)                                     = AVG3(D, C, B);
 }

-static void LD4(uint8_t* dst) {   // Down-Left
+static void LD4(uint8_t *dst) {   // Down-Left
  const int A = dst[0 - BPS];
  const int B = dst[1 - BPS];
  const int C = dst[2 - BPS];
@ -307,12 +318,12 @@ static void LD4(uint8_t* dst) {   // Down-Left
  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
-              DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
-                          DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
-                                      DST(3, 3) = AVG3(G, H, H);
+  DST(3, 1) = DST(2, 2) = DST(1, 3)             = AVG3(E, F, G);
+  DST(3, 2) = DST(2, 3)                         = AVG3(F, G, H);
+  DST(3, 3)                                     = AVG3(G, H, H);
 }

-static void VR4(uint8_t* dst) {   // Vertical-Right
+static void VR4(uint8_t *dst) {   // Vertical-Right
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@ -334,7 +345,7 @@ static void VR4(uint8_t* dst) {   // Vertical-Right
  DST(3, 1) =             AVG3(B, C, D);
 }

-static void VL4(uint8_t* dst) {   // Vertical-Left
+static void VL4(uint8_t *dst) {   // Vertical-Left
  const int A = dst[0 - BPS];
  const int B = dst[1 - BPS];
  const int C = dst[2 - BPS];
@ -356,7 +367,7 @@ static void VL4(uint8_t* dst) {   // Vertical-Left
              DST(3, 3) = AVG3(F, G, H);
 }

-static void HU4(uint8_t* dst) {   // Horizontal-Up
+static void HU4(uint8_t *dst) {   // Horizontal-Up
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@ -371,7 +382,7 @@ static void HU4(uint8_t* dst) {   // Horizontal-Up
    DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }

-static void HD4(uint8_t* dst) {  // Horizontal-Down
+static void HD4(uint8_t *dst) {  // Horizontal-Down
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@ -398,19 +409,17 @@ static void HD4(uint8_t* dst) {  // Horizontal-Down
 #undef AVG3
 #undef AVG2

-VP8PredFunc VP8PredLuma4[NUM_BMODES];
-
 //------------------------------------------------------------------------------
 // Chroma

-static void VE8uv(uint8_t* dst) {    // vertical
+static void VE8uv(uint8_t *dst) {    // vertical
  int j;
  for (j = 0; j < 8; ++j) {
    memcpy(dst + j * BPS, dst - BPS, 8);
  }
 }

-static void HE8uv(uint8_t* dst) {    // horizontal
+static void HE8uv(uint8_t *dst) {    // horizontal
  int j;
  for (j = 0; j < 8; ++j) {
    memset(dst, dst[-1], 8);
@ -421,12 +430,17 @@ static void HE8uv(uint8_t* dst) {    // horizontal
 // helper for chroma-DC predictions
 static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
  int j;
+#ifndef WEBP_REFERENCE_IMPLEMENTATION
+  const uint64_t v = (uint64_t)value * 0x0101010101010101ULL;
  for (j = 0; j < 8; ++j) {
-    memset(dst + j * BPS, value, 8);
+    *(uint64_t*)(dst + j * BPS) = v;
  }
+#else
+  for (j = 0; j < 8; ++j) memset(dst + j * BPS, value, 8);
+#endif
 }

-static void DC8uv(uint8_t* dst) {     // DC
+static void DC8uv(uint8_t *dst) {     // DC
  int dc0 = 8;
  int i;
  for (i = 0; i < 8; ++i) {
@ -435,7 +449,7 @@ static void DC8uv(uint8_t* dst) {     // DC
  Put8x8uv(dc0 >> 4, dst);
 }

-static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
+static void DC8uvNoLeft(uint8_t *dst) {   // DC with no left samples
  int dc0 = 4;
  int i;
  for (i = 0; i < 8; ++i) {
@ -444,7 +458,7 @@ static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
  Put8x8uv(dc0 >> 3, dst);
 }

-static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
+static void DC8uvNoTop(uint8_t *dst) {  // DC with no top samples
  int dc0 = 4;
  int i;
  for (i = 0; i < 8; ++i) {
@ -453,11 +467,26 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
  Put8x8uv(dc0 >> 3, dst);
 }

-static void DC8uvNoTopLeft(uint8_t* dst) {    // DC with nothing
+static void DC8uvNoTopLeft(uint8_t *dst) {    // DC with nothing
  Put8x8uv(0x80, dst);
 }

-VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
+//------------------------------------------------------------------------------
+// default C implementations
+
+const VP8PredFunc VP8PredLuma4[NUM_BMODES] = {
+  DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4
+};
+
+const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = {
+  DC16, TM16, VE16, HE16,
+  DC16NoTop, DC16NoLeft, DC16NoTopLeft
+};
+
+const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = {
+  DC8uv, TM8uv, VE8uv, HE8uv,
+  DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft
+};

 //------------------------------------------------------------------------------
 // Edge filtering functions
@ -465,62 +494,61 @@ VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
 // 4 pixels in, 2 pixels out
 static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];  // in [-893,892]
-  const int a1 = VP8ksclip2[(a + 4) >> 3];            // in [-16,15]
-  const int a2 = VP8ksclip2[(a + 3) >> 3];
-  p[-step] = VP8kclip1[p0 + a2];
-  p[    0] = VP8kclip1[q0 - a1];
+  const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1];
+  const int a1 = sclip2[112 + ((a + 4) >> 3)];
+  const int a2 = sclip2[112 + ((a + 3) >> 3)];
+  p[-step] = clip1[255 + p0 + a2];
+  p[    0] = clip1[255 + q0 - a1];
 }

 // 4 pixels in, 4 pixels out
 static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
  const int a = 3 * (q0 - p0);
-  const int a1 = VP8ksclip2[(a + 4) >> 3];
-  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  const int a1 = sclip2[112 + ((a + 4) >> 3)];
+  const int a2 = sclip2[112 + ((a + 3) >> 3)];
  const int a3 = (a1 + 1) >> 1;
-  p[-2*step] = VP8kclip1[p1 + a3];
-  p[-  step] = VP8kclip1[p0 + a2];
-  p[      0] = VP8kclip1[q0 - a1];
-  p[   step] = VP8kclip1[q1 - a3];
+  p[-2*step] = clip1[255 + p1 + a3];
+  p[-  step] = clip1[255 + p0 + a2];
+  p[      0] = clip1[255 + q0 - a1];
+  p[   step] = clip1[255 + q1 - a3];
 }

 // 6 pixels in, 6 pixels out
 static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
  const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
  const int q0 = p[0], q1 = p[step], q2 = p[2*step];
-  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
-  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
+  const int a = sclip1[1020 + 3 * (q0 - p0) + sclip1[1020 + p1 - q1]];
  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
-  p[-3*step] = VP8kclip1[p2 + a3];
-  p[-2*step] = VP8kclip1[p1 + a2];
-  p[-  step] = VP8kclip1[p0 + a1];
-  p[      0] = VP8kclip1[q0 - a1];
-  p[   step] = VP8kclip1[q1 - a2];
-  p[ 2*step] = VP8kclip1[q2 - a3];
+  p[-3*step] = clip1[255 + p2 + a3];
+  p[-2*step] = clip1[255 + p1 + a2];
+  p[-  step] = clip1[255 + p0 + a1];
+  p[      0] = clip1[255 + q0 - a1];
+  p[   step] = clip1[255 + q1 - a2];
+  p[ 2*step] = clip1[255 + q2 - a3];
 }

 static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
+  return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh);
 }

-static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
-  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh;
 }

 static WEBP_INLINE int needs_filter2(const uint8_t* p,
                                     int step, int t, int it) {
-  const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
-  const int p0 = p[-step], q0 = p[0];
-  const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
-  if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0;
-  return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it &&
-         VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
-         VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
+  const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step];
+  if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t)
+    return 0;
+  return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it &&
+         abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it &&
+         abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it;
 }

 //------------------------------------------------------------------------------
@ -528,9 +556,8 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,

 static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
  int i;
-  const int thresh2 = 2 * thresh + 1;
  for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i, stride, thresh2)) {
+    if (needs_filter(p + i, stride, thresh)) {
      do_filter2(p + i, stride);
    }
  }
@ -538,9 +565,8 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {

 static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
  int i;
-  const int thresh2 = 2 * thresh + 1;
  for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i * stride, 1, thresh2)) {
+    if (needs_filter(p + i * stride, 1, thresh)) {
      do_filter2(p + i * stride, 1);
    }
  }
@ -568,9 +594,8 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
 static WEBP_INLINE void FilterLoop26(uint8_t* p,
                                     int hstride, int vstride, int size,
                                     int thresh, int ithresh, int hev_thresh) {
-  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
-    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+    if (needs_filter2(p, hstride, thresh, ithresh)) {
      if (hev(p, hstride, hev_thresh)) {
        do_filter2(p, hstride);
      } else {
@ -584,9 +609,8 @@ static WEBP_INLINE void FilterLoop26(uint8_t* p,
 static WEBP_INLINE void FilterLoop24(uint8_t* p,
                                     int hstride, int vstride, int size,
                                     int thresh, int ithresh, int hev_thresh) {
-  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
-    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+    if (needs_filter2(p, hstride, thresh, ithresh)) {
      if (hev(p, hstride, hev_thresh)) {
        do_filter2(p, hstride);
      } else {
@ -655,7 +679,6 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
 //------------------------------------------------------------------------------

 VP8DecIdct2 VP8Transform;
-VP8DecIdct VP8TransformAC3;
 VP8DecIdct VP8TransformUV;
 VP8DecIdct VP8TransformDC;
 VP8DecIdct VP8TransformDCUV;
@ -674,25 +697,15 @@ VP8SimpleFilterFunc VP8SimpleVFilter16i;
 VP8SimpleFilterFunc VP8SimpleHFilter16i;

 extern void VP8DspInitSSE2(void);
-extern void VP8DspInitSSE41(void);
 extern void VP8DspInitNEON(void);
-extern void VP8DspInitMIPS32(void);
-extern void VP8DspInitMIPSdspR2(void);

-static volatile VP8CPUInfo dec_last_cpuinfo_used =
-    (VP8CPUInfo)&dec_last_cpuinfo_used;
+void VP8DspInit(void) {
+  DspInitTables();

-WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
-  if (dec_last_cpuinfo_used == VP8GetCPUInfo) return;
-
-  VP8InitClipTables();
-
-  VP8TransformWHT = TransformWHT;
  VP8Transform = TransformTwo;
  VP8TransformUV = TransformUV;
  VP8TransformDC = TransformDC;
  VP8TransformDCUV = TransformDCUV;
-  VP8TransformAC3 = TransformAC3;

  VP8VFilter16 = VFilter16;
  VP8HFilter16 = HFilter16;
@ -707,60 +720,20 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
  VP8SimpleVFilter16i = SimpleVFilter16i;
  VP8SimpleHFilter16i = SimpleHFilter16i;

-  VP8PredLuma4[0] = DC4;
-  VP8PredLuma4[1] = TM4;
-  VP8PredLuma4[2] = VE4;
-  VP8PredLuma4[3] = HE4;
-  VP8PredLuma4[4] = RD4;
-  VP8PredLuma4[5] = VR4;
-  VP8PredLuma4[6] = LD4;
-  VP8PredLuma4[7] = VL4;
-  VP8PredLuma4[8] = HD4;
-  VP8PredLuma4[9] = HU4;
-
-  VP8PredLuma16[0] = DC16;
-  VP8PredLuma16[1] = TM16;
-  VP8PredLuma16[2] = VE16;
-  VP8PredLuma16[3] = HE16;
-  VP8PredLuma16[4] = DC16NoTop;
-  VP8PredLuma16[5] = DC16NoLeft;
-  VP8PredLuma16[6] = DC16NoTopLeft;
-
-  VP8PredChroma8[0] = DC8uv;
-  VP8PredChroma8[1] = TM8uv;
-  VP8PredChroma8[2] = VE8uv;
-  VP8PredChroma8[3] = HE8uv;
-  VP8PredChroma8[4] = DC8uvNoTop;
-  VP8PredChroma8[5] = DC8uvNoLeft;
-  VP8PredChroma8[6] = DC8uvNoTopLeft;
-
  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo != NULL) {
+  if (VP8GetCPUInfo) {
 #if defined(WEBP_USE_SSE2)
    if (VP8GetCPUInfo(kSSE2)) {
      VP8DspInitSSE2();
-#if defined(WEBP_USE_SSE41)
-      if (VP8GetCPUInfo(kSSE4_1)) {
-        VP8DspInitSSE41();
-      }
-#endif
    }
-#endif
-#if defined(WEBP_USE_NEON)
+#elif defined(WEBP_USE_NEON)
    if (VP8GetCPUInfo(kNEON)) {
      VP8DspInitNEON();
    }
-#endif
-#if defined(WEBP_USE_MIPS32)
-    if (VP8GetCPUInfo(kMIPS32)) {
-      VP8DspInitMIPS32();
-    }
-#endif
-#if defined(WEBP_USE_MIPS_DSP_R2)
-    if (VP8GetCPUInfo(kMIPSdspR2)) {
-      VP8DspInitMIPSdspR2();
-    }
 #endif
  }
-  dec_last_cpuinfo_used = VP8GetCPUInfo;
 }
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dsp/dec_clip_tables.c
+++ b/src/dsp/dec_clip_tables.c
@ -1,366 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Clipping tables for filtering
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-
-#define USE_STATIC_TABLES     // undefine to have run-time table initialization
-
-#ifdef USE_STATIC_TABLES
-
-static const uint8_t abs0[255 + 255 + 1] = {
-  0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
-  0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8,
-  0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, 0xdd, 0xdc,
-  0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0,
-  0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4,
-  0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8,
-  0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac,
-  0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0,
-  0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94,
-  0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88,
-  0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c,
-  0x7b, 0x7a, 0x79, 0x78, 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70,
-  0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
-  0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58,
-  0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c,
-  0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
-  0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
-  0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
-  0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c,
-  0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
-  0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
-  0x03, 0x02, 0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
-  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
-  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
-  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
-  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
-  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
-  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
-  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
-  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
-  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
-  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
-  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
-  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
-  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
-  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
-  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
-  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
-  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
-  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
-};
-
-static const int8_t sclip1[1020 + 1020 + 1] = {
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-  0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-  0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93,
-  0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
-  0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab,
-  0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
-  0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3,
-  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
-  0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
-  0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
-  0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
-  0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
-  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
-  0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
-  0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
-  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
-  0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
-  0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53,
-  0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
-  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
-  0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
-  0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
-};
-
-static const int8_t sclip2[112 + 112 + 1] = {
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-  0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
-  0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f
-};
-
-static const uint8_t clip1[255 + 511 + 1] = {
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
-  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
-  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
-  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
-  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
-  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
-  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
-  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
-  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
-  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
-  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
-  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
-  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
-  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
-  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
-  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
-  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
-  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
-  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-};
-
-#else
-
-// uninitialized tables
-static uint8_t abs0[255 + 255 + 1];
-static int8_t sclip1[1020 + 1020 + 1];
-static int8_t sclip2[112 + 112 + 1];
-static uint8_t clip1[255 + 511 + 1];
-
-// We declare this variable 'volatile' to prevent instruction reordering
-// and make sure it's set to true _last_ (so as to be thread-safe)
-static volatile int tables_ok = 0;
-
-#endif
-
-const int8_t* const VP8ksclip1 = &sclip1[1020];
-const int8_t* const VP8ksclip2 = &sclip2[112];
-const uint8_t* const VP8kclip1 = &clip1[255];
-const uint8_t* const VP8kabs0 = &abs0[255];
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
-#if !defined(USE_STATIC_TABLES)
-  int i;
-  if (!tables_ok) {
-    for (i = -255; i <= 255; ++i) {
-      abs0[255 + i] = (i < 0) ? -i : i;
-    }
-    for (i = -1020; i <= 1020; ++i) {
-      sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
-    }
-    for (i = -112; i <= 112; ++i) {
-      sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
-    }
-    for (i = -255; i <= 255 + 255; ++i) {
-      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
-    }
-    tables_ok = 1;
-  }
-#endif    // USE_STATIC_TABLES
-}
--- a/src/dsp/dec_mips32.c
+++ b/src/dsp/dec_mips32.c
@ -1,587 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// MIPS version of dsp functions
-//
-// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
-//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_MIPS32)
-
-#include "./mips_macro.h"
-
-static const int kC1 = 20091 + (1 << 16);
-static const int kC2 = 35468;
-
-static WEBP_INLINE int abs_mips32(int x) {
-  const int sign = x >> 31;
-  return (x ^ sign) - sign;
-}
-
-// 4 pixels in, 2 pixels out
-static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
-  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
-  const int a1 = VP8ksclip2[(a + 4) >> 3];
-  const int a2 = VP8ksclip2[(a + 3) >> 3];
-  p[-step] = VP8kclip1[p0 + a2];
-  p[    0] = VP8kclip1[q0 - a1];
-}
-
-// 4 pixels in, 4 pixels out
-static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
-  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  const int a = 3 * (q0 - p0);
-  const int a1 = VP8ksclip2[(a + 4) >> 3];
-  const int a2 = VP8ksclip2[(a + 3) >> 3];
-  const int a3 = (a1 + 1) >> 1;
-  p[-2 * step] = VP8kclip1[p1 + a3];
-  p[-    step] = VP8kclip1[p0 + a2];
-  p[        0] = VP8kclip1[q0 - a1];
-  p[     step] = VP8kclip1[q1 - a3];
-}
-
-// 6 pixels in, 6 pixels out
-static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
-  const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
-  const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
-  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
-  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
-  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
-  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
-  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
-  p[-3 * step] = VP8kclip1[p2 + a3];
-  p[-2 * step] = VP8kclip1[p1 + a2];
-  p[-    step] = VP8kclip1[p0 + a1];
-  p[        0] = VP8kclip1[q0 - a1];
-  p[     step] = VP8kclip1[q1 - a2];
-  p[ 2 * step] = VP8kclip1[q2 - a3];
-}
-
-static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
-  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);
-}
-
-static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
-  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
-}
-
-static WEBP_INLINE int needs_filter2(const uint8_t* p,
-                                     int step, int t, int it) {
-  const int p3 = p[-4 * step], p2 = p[-3 * step];
-  const int p1 = p[-2 * step], p0 = p[-step];
-  const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
-  if ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) > t) {
-    return 0;
-  }
-  return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
-         abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
-         abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
-}
-
-static WEBP_INLINE void FilterLoop26(uint8_t* p,
-                                     int hstride, int vstride, int size,
-                                     int thresh, int ithresh, int hev_thresh) {
-  const int thresh2 = 2 * thresh + 1;
-  while (size-- > 0) {
-    if (needs_filter2(p, hstride, thresh2, ithresh)) {
-      if (hev(p, hstride, hev_thresh)) {
-        do_filter2(p, hstride);
-      } else {
-        do_filter6(p, hstride);
-      }
-    }
-    p += vstride;
-  }
-}
-
-static WEBP_INLINE void FilterLoop24(uint8_t* p,
-                                     int hstride, int vstride, int size,
-                                     int thresh, int ithresh, int hev_thresh) {
-  const int thresh2 = 2 * thresh + 1;
-  while (size-- > 0) {
-    if (needs_filter2(p, hstride, thresh2, ithresh)) {
-      if (hev(p, hstride, hev_thresh)) {
-        do_filter2(p, hstride);
-      } else {
-        do_filter4(p, hstride);
-      }
-    }
-    p += vstride;
-  }
-}
-
-// on macroblock edges
-static void VFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
-}
-
-static void HFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
-}
-
-// 8-pixels wide variant, for chroma filtering
-static void VFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
-  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
-}
-
-static void HFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
-  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
-}
-
-static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
-}
-
-static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
-}
-
-// on three inner edges
-static void VFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4 * stride;
-    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
-  }
-}
-
-static void HFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4;
-    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
-  }
-}
-
-//------------------------------------------------------------------------------
-// Simple In-loop filtering (Paragraph 15.2)
-
-static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
-  int i;
-  const int thresh2 = 2 * thresh + 1;
-  for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i, stride, thresh2)) {
-      do_filter2(p + i, stride);
-    }
-  }
-}
-
-static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
-  int i;
-  const int thresh2 = 2 * thresh + 1;
-  for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i * stride, 1, thresh2)) {
-      do_filter2(p + i * stride, 1);
-    }
-  }
-}
-
-static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4 * stride;
-    SimpleVFilter16(p, stride, thresh);
-  }
-}
-
-static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4;
-    SimpleHFilter16(p, stride, thresh);
-  }
-}
-
-static void TransformOne(const int16_t* in, uint8_t* dst) {
-  int temp0, temp1, temp2, temp3, temp4;
-  int temp5, temp6, temp7, temp8, temp9;
-  int temp10, temp11, temp12, temp13, temp14;
-  int temp15, temp16, temp17, temp18;
-  int16_t* p_in = (int16_t*)in;
-
-  // loops unrolled and merged to avoid usage of tmp buffer
-  // and to reduce number of stalls. MUL macro is written
-  // in assembler and inlined
-  __asm__ volatile(
-    "lh       %[temp0],  0(%[in])                      \n\t"
-    "lh       %[temp8],  16(%[in])                     \n\t"
-    "lh       %[temp4],  8(%[in])                      \n\t"
-    "lh       %[temp12], 24(%[in])                     \n\t"
-    "addu     %[temp16], %[temp0],  %[temp8]           \n\t"
-    "subu     %[temp0],  %[temp0],  %[temp8]           \n\t"
-    "mul      %[temp8],  %[temp4],  %[kC2]             \n\t"
-    "mul      %[temp17], %[temp12], %[kC1]             \n\t"
-    "mul      %[temp4],  %[temp4],  %[kC1]             \n\t"
-    "mul      %[temp12], %[temp12], %[kC2]             \n\t"
-    "lh       %[temp1],  2(%[in])                      \n\t"
-    "lh       %[temp5],  10(%[in])                     \n\t"
-    "lh       %[temp9],  18(%[in])                     \n\t"
-    "lh       %[temp13], 26(%[in])                     \n\t"
-    "sra      %[temp8],  %[temp8],  16                 \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
-    "sra      %[temp4],  %[temp4],  16                 \n\t"
-    "sra      %[temp12], %[temp12], 16                 \n\t"
-    "lh       %[temp2],  4(%[in])                      \n\t"
-    "lh       %[temp6],  12(%[in])                     \n\t"
-    "lh       %[temp10], 20(%[in])                     \n\t"
-    "lh       %[temp14], 28(%[in])                     \n\t"
-    "subu     %[temp17], %[temp8],  %[temp17]          \n\t"
-    "addu     %[temp4],  %[temp4],  %[temp12]          \n\t"
-    "addu     %[temp8],  %[temp16], %[temp4]           \n\t"
-    "subu     %[temp4],  %[temp16], %[temp4]           \n\t"
-    "addu     %[temp16], %[temp1],  %[temp9]           \n\t"
-    "subu     %[temp1],  %[temp1],  %[temp9]           \n\t"
-    "lh       %[temp3],  6(%[in])                      \n\t"
-    "lh       %[temp7],  14(%[in])                     \n\t"
-    "lh       %[temp11], 22(%[in])                     \n\t"
-    "lh       %[temp15], 30(%[in])                     \n\t"
-    "addu     %[temp12], %[temp0],  %[temp17]          \n\t"
-    "subu     %[temp0],  %[temp0],  %[temp17]          \n\t"
-    "mul      %[temp9],  %[temp5],  %[kC2]             \n\t"
-    "mul      %[temp17], %[temp13], %[kC1]             \n\t"
-    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
-    "mul      %[temp13], %[temp13], %[kC2]             \n\t"
-    "sra      %[temp9],  %[temp9],  16                 \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
-    "subu     %[temp17], %[temp9],  %[temp17]          \n\t"
-    "sra      %[temp5],  %[temp5],  16                 \n\t"
-    "sra      %[temp13], %[temp13], 16                 \n\t"
-    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
-    "addu     %[temp13], %[temp1],  %[temp17]          \n\t"
-    "subu     %[temp1],  %[temp1],  %[temp17]          \n\t"
-    "mul      %[temp17], %[temp14], %[kC1]             \n\t"
-    "mul      %[temp14], %[temp14], %[kC2]             \n\t"
-    "addu     %[temp9],  %[temp16], %[temp5]           \n\t"
-    "subu     %[temp5],  %[temp16], %[temp5]           \n\t"
-    "addu     %[temp16], %[temp2],  %[temp10]          \n\t"
-    "subu     %[temp2],  %[temp2],  %[temp10]          \n\t"
-    "mul      %[temp10], %[temp6],  %[kC2]             \n\t"
-    "mul      %[temp6],  %[temp6],  %[kC1]             \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
-    "sra      %[temp14], %[temp14], 16                 \n\t"
-    "sra      %[temp10], %[temp10], 16                 \n\t"
-    "sra      %[temp6],  %[temp6],  16                 \n\t"
-    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
-    "addu     %[temp6],  %[temp6],  %[temp14]          \n\t"
-    "addu     %[temp10], %[temp16], %[temp6]           \n\t"
-    "subu     %[temp6],  %[temp16], %[temp6]           \n\t"
-    "addu     %[temp14], %[temp2],  %[temp17]          \n\t"
-    "subu     %[temp2],  %[temp2],  %[temp17]          \n\t"
-    "mul      %[temp17], %[temp15], %[kC1]             \n\t"
-    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
-    "addu     %[temp16], %[temp3],  %[temp11]          \n\t"
-    "subu     %[temp3],  %[temp3],  %[temp11]          \n\t"
-    "mul      %[temp11], %[temp7],  %[kC2]             \n\t"
-    "mul      %[temp7],  %[temp7],  %[kC1]             \n\t"
-    "addiu    %[temp8],  %[temp8],  4                  \n\t"
-    "addiu    %[temp12], %[temp12], 4                  \n\t"
-    "addiu    %[temp0],  %[temp0],  4                  \n\t"
-    "addiu    %[temp4],  %[temp4],  4                  \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
-    "sra      %[temp15], %[temp15], 16                 \n\t"
-    "sra      %[temp11], %[temp11], 16                 \n\t"
-    "sra      %[temp7],  %[temp7],  16                 \n\t"
-    "subu     %[temp17], %[temp11], %[temp17]          \n\t"
-    "addu     %[temp7],  %[temp7],  %[temp15]          \n\t"
-    "addu     %[temp15], %[temp3],  %[temp17]          \n\t"
-    "subu     %[temp3],  %[temp3],  %[temp17]          \n\t"
-    "addu     %[temp11], %[temp16], %[temp7]           \n\t"
-    "subu     %[temp7],  %[temp16], %[temp7]           \n\t"
-    "addu     %[temp16], %[temp8],  %[temp10]          \n\t"
-    "subu     %[temp8],  %[temp8],  %[temp10]          \n\t"
-    "mul      %[temp10], %[temp9],  %[kC2]             \n\t"
-    "mul      %[temp17], %[temp11], %[kC1]             \n\t"
-    "mul      %[temp9],  %[temp9],  %[kC1]             \n\t"
-    "mul      %[temp11], %[temp11], %[kC2]             \n\t"
-    "sra      %[temp10], %[temp10], 16                 \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
-    "sra      %[temp9],  %[temp9],  16                 \n\t"
-    "sra      %[temp11], %[temp11], 16                 \n\t"
-    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
-    "addu     %[temp11], %[temp9],  %[temp11]          \n\t"
-    "addu     %[temp10], %[temp12], %[temp14]          \n\t"
-    "subu     %[temp12], %[temp12], %[temp14]          \n\t"
-    "mul      %[temp14], %[temp13], %[kC2]             \n\t"
-    "mul      %[temp9],  %[temp15], %[kC1]             \n\t"
-    "mul      %[temp13], %[temp13], %[kC1]             \n\t"
-    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
-    "sra      %[temp14], %[temp14], 16                 \n\t"
-    "sra      %[temp9],  %[temp9],  16                 \n\t"
-    "sra      %[temp13], %[temp13], 16                 \n\t"
-    "sra      %[temp15], %[temp15], 16                 \n\t"
-    "subu     %[temp9],  %[temp14], %[temp9]           \n\t"
-    "addu     %[temp15], %[temp13], %[temp15]          \n\t"
-    "addu     %[temp14], %[temp0],  %[temp2]           \n\t"
-    "subu     %[temp0],  %[temp0],  %[temp2]           \n\t"
-    "mul      %[temp2],  %[temp1],  %[kC2]             \n\t"
-    "mul      %[temp13], %[temp3],  %[kC1]             \n\t"
-    "mul      %[temp1],  %[temp1],  %[kC1]             \n\t"
-    "mul      %[temp3],  %[temp3],  %[kC2]             \n\t"
-    "sra      %[temp2],  %[temp2],  16                 \n\t"
-    "sra      %[temp13], %[temp13], 16                 \n\t"
-    "sra      %[temp1],  %[temp1],  16                 \n\t"
-    "sra      %[temp3],  %[temp3],  16                 \n\t"
-    "subu     %[temp13], %[temp2],  %[temp13]          \n\t"
-    "addu     %[temp3],  %[temp1],  %[temp3]           \n\t"
-    "addu     %[temp2],  %[temp4],  %[temp6]           \n\t"
-    "subu     %[temp4],  %[temp4],  %[temp6]           \n\t"
-    "mul      %[temp6],  %[temp5],  %[kC2]             \n\t"
-    "mul      %[temp1],  %[temp7],  %[kC1]             \n\t"
-    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
-    "mul      %[temp7],  %[temp7],  %[kC2]             \n\t"
-    "sra      %[temp6],  %[temp6],  16                 \n\t"
-    "sra      %[temp1],  %[temp1],  16                 \n\t"
-    "sra      %[temp5],  %[temp5],  16                 \n\t"
-    "sra      %[temp7],  %[temp7],  16                 \n\t"
-    "subu     %[temp1],  %[temp6],  %[temp1]           \n\t"
-    "addu     %[temp7],  %[temp5],  %[temp7]           \n\t"
-    "addu     %[temp5],  %[temp16], %[temp11]          \n\t"
-    "subu     %[temp16], %[temp16], %[temp11]          \n\t"
-    "addu     %[temp11], %[temp8],  %[temp17]          \n\t"
-    "subu     %[temp8],  %[temp8],  %[temp17]          \n\t"
-    "sra      %[temp5],  %[temp5],  3                  \n\t"
-    "sra      %[temp16], %[temp16], 3                  \n\t"
-    "sra      %[temp11], %[temp11], 3                  \n\t"
-    "sra      %[temp8],  %[temp8],  3                  \n\t"
-    "addu     %[temp17], %[temp10], %[temp15]          \n\t"
-    "subu     %[temp10], %[temp10], %[temp15]          \n\t"
-    "addu     %[temp15], %[temp12], %[temp9]           \n\t"
-    "subu     %[temp12], %[temp12], %[temp9]           \n\t"
-    "sra      %[temp17], %[temp17], 3                  \n\t"
-    "sra      %[temp10], %[temp10], 3                  \n\t"
-    "sra      %[temp15], %[temp15], 3                  \n\t"
-    "sra      %[temp12], %[temp12], 3                  \n\t"
-    "addu     %[temp9],  %[temp14], %[temp3]           \n\t"
-    "subu     %[temp14], %[temp14], %[temp3]           \n\t"
-    "addu     %[temp3],  %[temp0],  %[temp13]          \n\t"
-    "subu     %[temp0],  %[temp0],  %[temp13]          \n\t"
-    "sra      %[temp9],  %[temp9],  3                  \n\t"
-    "sra      %[temp14], %[temp14], 3                  \n\t"
-    "sra      %[temp3],  %[temp3],  3                  \n\t"
-    "sra      %[temp0],  %[temp0],  3                  \n\t"
-    "addu     %[temp13], %[temp2],  %[temp7]           \n\t"
-    "subu     %[temp2],  %[temp2],  %[temp7]           \n\t"
-    "addu     %[temp7],  %[temp4],  %[temp1]           \n\t"
-    "subu     %[temp4],  %[temp4],  %[temp1]           \n\t"
-    "sra      %[temp13], %[temp13], 3                  \n\t"
-    "sra      %[temp2],  %[temp2],  3                  \n\t"
-    "sra      %[temp7],  %[temp7],  3                  \n\t"
-    "sra      %[temp4],  %[temp4],  3                  \n\t"
-    "addiu    %[temp6],  $zero,     255                \n\t"
-    "lbu      %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
-    "addu     %[temp1],  %[temp1],  %[temp5]           \n\t"
-    "sra      %[temp5],  %[temp1],  8                  \n\t"
-    "sra      %[temp18], %[temp1],  31                 \n\t"
-    "beqz     %[temp5],  1f                            \n\t"
-    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
-    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
-  "1:                                                  \n\t"
-    "lbu      %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
-    "addu     %[temp18], %[temp18], %[temp11]          \n\t"
-    "sra      %[temp11], %[temp18], 8                  \n\t"
-    "sra      %[temp1],  %[temp18], 31                 \n\t"
-    "beqz     %[temp11], 2f                            \n\t"
-    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
-    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
-  "2:                                                  \n\t"
-    "lbu      %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
-    "addu     %[temp1],  %[temp1],  %[temp8]           \n\t"
-    "sra      %[temp8],  %[temp1],  8                  \n\t"
-    "sra      %[temp18], %[temp1],  31                 \n\t"
-    "beqz     %[temp8],  3f                            \n\t"
-    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
-    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
-  "3:                                                  \n\t"
-    "lbu      %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
-    "addu     %[temp18], %[temp18], %[temp16]          \n\t"
-    "sra      %[temp16], %[temp18], 8                  \n\t"
-    "sra      %[temp1],  %[temp18], 31                 \n\t"
-    "beqz     %[temp16], 4f                            \n\t"
-    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
-    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
-  "4:                                                  \n\t"
-    "sb       %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
-    "addu     %[temp5],  %[temp5],  %[temp17]          \n\t"
-    "addu     %[temp8],  %[temp8],  %[temp15]          \n\t"
-    "addu     %[temp11], %[temp11], %[temp12]          \n\t"
-    "addu     %[temp16], %[temp16], %[temp10]          \n\t"
-    "sra      %[temp18], %[temp5],  8                  \n\t"
-    "sra      %[temp1],  %[temp5],  31                 \n\t"
-    "beqz     %[temp18], 5f                            \n\t"
-    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
-    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
-  "5:                                                  \n\t"
-    "sra      %[temp18], %[temp8],  8                  \n\t"
-    "sra      %[temp1],  %[temp8],  31                 \n\t"
-    "beqz     %[temp18], 6f                            \n\t"
-    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
-    "movz     %[temp8],  %[temp6],  %[temp1]           \n\t"
-  "6:                                                  \n\t"
-    "sra      %[temp18], %[temp11], 8                  \n\t"
-    "sra      %[temp1],  %[temp11], 31                 \n\t"
-    "sra      %[temp17], %[temp16], 8                  \n\t"
-    "sra      %[temp15], %[temp16], 31                 \n\t"
-    "beqz     %[temp18], 7f                            \n\t"
-    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
-    "movz     %[temp11], %[temp6],  %[temp1]           \n\t"
-  "7:                                                  \n\t"
-    "beqz     %[temp17], 8f                            \n\t"
-    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
-    "movz     %[temp16], %[temp6],  %[temp15]          \n\t"
-  "8:                                                  \n\t"
-    "sb       %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
-    "addu     %[temp5],  %[temp5],  %[temp9]           \n\t"
-    "addu     %[temp8],  %[temp8],  %[temp3]           \n\t"
-    "addu     %[temp11], %[temp11], %[temp0]           \n\t"
-    "addu     %[temp16], %[temp16], %[temp14]          \n\t"
-    "sra      %[temp18], %[temp5],  8                  \n\t"
-    "sra      %[temp1],  %[temp5],  31                 \n\t"
-    "sra      %[temp17], %[temp8],  8                  \n\t"
-    "sra      %[temp15], %[temp8],  31                 \n\t"
-    "sra      %[temp12], %[temp11], 8                  \n\t"
-    "sra      %[temp10], %[temp11], 31                 \n\t"
-    "sra      %[temp9],  %[temp16], 8                  \n\t"
-    "sra      %[temp3],  %[temp16], 31                 \n\t"
-    "beqz     %[temp18], 9f                            \n\t"
-    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
-    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
-  "9:                                                  \n\t"
-    "beqz     %[temp17], 10f                           \n\t"
-    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
-    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
-  "10:                                                 \n\t"
-    "beqz     %[temp12], 11f                           \n\t"
-    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
-    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
-  "11:                                                 \n\t"
-    "beqz     %[temp9],  12f                           \n\t"
-    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
-    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
-  "12:                                                 \n\t"
-    "sb       %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
-    "lbu      %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
-    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
-    "addu     %[temp8],  %[temp8],  %[temp7]           \n\t"
-    "addu     %[temp11], %[temp11], %[temp4]           \n\t"
-    "addu     %[temp16], %[temp16], %[temp2]           \n\t"
-    "sra      %[temp18], %[temp5],  8                  \n\t"
-    "sra      %[temp1],  %[temp5],  31                 \n\t"
-    "sra      %[temp17], %[temp8],  8                  \n\t"
-    "sra      %[temp15], %[temp8],  31                 \n\t"
-    "sra      %[temp12], %[temp11], 8                  \n\t"
-    "sra      %[temp10], %[temp11], 31                 \n\t"
-    "sra      %[temp9],  %[temp16], 8                  \n\t"
-    "sra      %[temp3],  %[temp16], 31                 \n\t"
-    "beqz     %[temp18], 13f                           \n\t"
-    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
-    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
-  "13:                                                 \n\t"
-    "beqz     %[temp17], 14f                           \n\t"
-    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
-    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
-  "14:                                                 \n\t"
-    "beqz     %[temp12], 15f                           \n\t"
-    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
-    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
-  "15:                                                 \n\t"
-    "beqz     %[temp9],  16f                           \n\t"
-    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
-    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
-  "16:                                                 \n\t"
-    "sb       %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
-    "sb       %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
-
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
-      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
-      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
-      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
-      [temp18]"=&r"(temp18)
-    : [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
-    : "memory", "hi", "lo"
-  );
-}
-
-static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
-  TransformOne(in, dst);
-  if (do_two) {
-    TransformOne(in + 16, dst + 4);
-  }
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8DspInitMIPS32(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPS32(void) {
-  VP8InitClipTables();
-
-  VP8Transform = TransformTwo;
-
-  VP8VFilter16 = VFilter16;
-  VP8HFilter16 = HFilter16;
-  VP8VFilter8 = VFilter8;
-  VP8HFilter8 = HFilter8;
-  VP8VFilter16i = VFilter16i;
-  VP8HFilter16i = HFilter16i;
-  VP8VFilter8i = VFilter8i;
-  VP8HFilter8i = HFilter8i;
-
-  VP8SimpleVFilter16 = SimpleVFilter16;
-  VP8SimpleHFilter16 = SimpleHFilter16;
-  VP8SimpleVFilter16i = SimpleVFilter16i;
-  VP8SimpleHFilter16i = SimpleHFilter16i;
-}
-
-#else  // !WEBP_USE_MIPS32
-
-WEBP_DSP_INIT_STUB(VP8DspInitMIPS32)
-
-#endif  // WEBP_USE_MIPS32
--- a/src/dsp/dec_mips_dsp_r2.c
+++ b/src/dsp/dec_mips_dsp_r2.c
@ -1,994 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// MIPS version of dsp functions
-//
-// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
-//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_MIPS_DSP_R2)
-
-#include "./mips_macro.h"
-
-static const int kC1 = 20091 + (1 << 16);
-static const int kC2 = 35468;
-
-#define MUL(a, b) (((a) * (b)) >> 16)
-
-static void TransformDC(const int16_t* in, uint8_t* dst) {
-  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
-
-  __asm__ volatile (
-    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, dst,
-                        0, 0, 0, 0,
-                        0, 1, 2, 3,
-                        BPS)
-    "lh               %[temp5],  0(%[in])               \n\t"
-    "addiu            %[temp5],  %[temp5],  4           \n\t"
-    "ins              %[temp5],  %[temp5],  16, 16      \n\t"
-    "shra.ph          %[temp5],  %[temp5],  3           \n\t"
-    CONVERT_2_BYTES_TO_HALF(temp6, temp7, temp8, temp9, temp10, temp1, temp2,
-                            temp3, temp1, temp2, temp3, temp4)
-    STORE_SAT_SUM_X2(temp6, temp7, temp8, temp9, temp10, temp1, temp2, temp3,
-                     temp5, temp5, temp5, temp5, temp5, temp5, temp5, temp5,
-                     dst, 0, 1, 2, 3, BPS)
-
-    OUTPUT_EARLY_CLOBBER_REGS_10()
-    : [in]"r"(in), [dst]"r"(dst)
-    : "memory"
-  );
-}
-
-static void TransformAC3(const int16_t* in, uint8_t* dst) {
-  const int a = in[0] + 4;
-  int c4 = MUL(in[4], kC2);
-  const int d4 = MUL(in[4], kC1);
-  const int c1 = MUL(in[1], kC2);
-  const int d1 = MUL(in[1], kC1);
-  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
-  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
-
-  __asm__ volatile (
-    "ins              %[c4],      %[d4],     16,       16    \n\t"
-    "replv.ph         %[temp1],   %[a]                       \n\t"
-    "replv.ph         %[temp4],   %[d1]                      \n\t"
-    ADD_SUB_HALVES(temp2, temp3, temp1, c4)
-    "replv.ph         %[temp5],   %[c1]                      \n\t"
-    SHIFT_R_SUM_X2(temp1, temp6, temp7, temp8, temp2, temp9, temp10, temp4,
-                   temp2, temp2, temp3, temp3, temp4, temp5, temp4, temp5)
-    LOAD_WITH_OFFSET_X4(temp3, temp5, temp11, temp12, dst,
-                        0, 0, 0, 0,
-                        0, 1, 2, 3,
-                        BPS)
-    CONVERT_2_BYTES_TO_HALF(temp13, temp14, temp3, temp15, temp5, temp16,
-                            temp11, temp17, temp3, temp5, temp11, temp12)
-    PACK_2_HALVES_TO_WORD(temp12, temp18, temp7, temp6, temp1, temp8, temp2,
-                          temp4, temp7, temp6, temp10, temp9)
-    STORE_SAT_SUM_X2(temp13, temp14, temp3, temp15, temp5, temp16, temp11,
-                     temp17, temp12, temp18, temp1, temp8, temp2, temp4,
-                     temp7, temp6, dst, 0, 1, 2, 3, BPS)
-
-    OUTPUT_EARLY_CLOBBER_REGS_18(),
-      [c4]"+&r"(c4)
-    : [dst]"r"(dst), [a]"r"(a), [d1]"r"(d1), [d4]"r"(d4), [c1]"r"(c1)
-    : "memory"
-  );
-}
-
-static void TransformOne(const int16_t* in, uint8_t* dst) {
-  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
-  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
-
-  __asm__ volatile (
-    "ulw              %[temp1],   0(%[in])                 \n\t"
-    "ulw              %[temp2],   16(%[in])                \n\t"
-    LOAD_IN_X2(temp5, temp6, 24, 26)
-    ADD_SUB_HALVES(temp3, temp4, temp1, temp2)
-    LOAD_IN_X2(temp1, temp2, 8, 10)
-    MUL_SHIFT_SUM(temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14,
-                  temp10, temp8, temp9, temp7, temp1, temp2, temp5, temp6,
-                  temp13, temp11, temp14, temp12)
-    INSERT_HALF_X2(temp8, temp7, temp10, temp9)
-    "ulw              %[temp17],  4(%[in])                 \n\t"
-    "ulw              %[temp18],  20(%[in])                \n\t"
-    ADD_SUB_HALVES(temp1, temp2, temp3, temp8)
-    ADD_SUB_HALVES(temp5, temp6, temp4, temp7)
-    ADD_SUB_HALVES(temp7, temp8, temp17, temp18)
-    LOAD_IN_X2(temp17, temp18, 12, 14)
-    LOAD_IN_X2(temp9, temp10, 28, 30)
-    MUL_SHIFT_SUM(temp11, temp12, temp13, temp14, temp15, temp16, temp4, temp17,
-                  temp12, temp14, temp11, temp13, temp17, temp18, temp9, temp10,
-                  temp15, temp4, temp16, temp17)
-    INSERT_HALF_X2(temp11, temp12, temp13, temp14)
-    ADD_SUB_HALVES(temp17, temp8, temp8, temp11)
-    ADD_SUB_HALVES(temp3, temp4, temp7, temp12)
-
-    // horizontal
-    SRA_16(temp9, temp10, temp11, temp12, temp1, temp2, temp5, temp6)
-    INSERT_HALF_X2(temp1, temp6, temp5, temp2)
-    SRA_16(temp13, temp14, temp15, temp16, temp3, temp4, temp17, temp8)
-    "repl.ph          %[temp2],   0x4                      \n\t"
-    INSERT_HALF_X2(temp3, temp8, temp17, temp4)
-    "addq.ph          %[temp1],   %[temp1],  %[temp2]      \n\t"
-    "addq.ph          %[temp6],   %[temp6],  %[temp2]      \n\t"
-    ADD_SUB_HALVES(temp2, temp4, temp1, temp3)
-    ADD_SUB_HALVES(temp5, temp7, temp6, temp8)
-    MUL_SHIFT_SUM(temp1, temp3, temp6, temp8, temp9, temp13, temp17, temp18,
-                  temp3, temp13, temp1, temp9, temp9, temp13, temp11, temp15,
-                  temp6, temp17, temp8, temp18)
-    MUL_SHIFT_SUM(temp6, temp8, temp18, temp17, temp11, temp15, temp12, temp16,
-                  temp8, temp15, temp6, temp11, temp12, temp16, temp10, temp14,
-                  temp18, temp12, temp17, temp16)
-    INSERT_HALF_X2(temp1, temp3, temp9, temp13)
-    INSERT_HALF_X2(temp6, temp8, temp11, temp15)
-    SHIFT_R_SUM_X2(temp9, temp10, temp11, temp12, temp13, temp14, temp15,
-                   temp16, temp2, temp4, temp5, temp7, temp3, temp1, temp8,
-                   temp6)
-    PACK_2_HALVES_TO_WORD(temp1, temp2, temp3, temp4, temp9, temp12, temp13,
-                          temp16, temp11, temp10, temp15, temp14)
-    LOAD_WITH_OFFSET_X4(temp10, temp11, temp14, temp15, dst,
-                        0, 0, 0, 0,
-                        0, 1, 2, 3,
-                        BPS)
-    CONVERT_2_BYTES_TO_HALF(temp5, temp6, temp7, temp8, temp17, temp18, temp10,
-                            temp11, temp10, temp11, temp14, temp15)
-    STORE_SAT_SUM_X2(temp5, temp6, temp7, temp8, temp17, temp18, temp10, temp11,
-                     temp9, temp12, temp1, temp2, temp13, temp16, temp3, temp4,
-                     dst, 0, 1, 2, 3, BPS)
-
-    OUTPUT_EARLY_CLOBBER_REGS_18()
-    : [dst]"r"(dst), [in]"r"(in), [kC1]"r"(kC1), [kC2]"r"(kC2)
-    : "memory", "hi", "lo"
-  );
-}
-
-static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
-  TransformOne(in, dst);
-  if (do_two) {
-    TransformOne(in + 16, dst + 4);
-  }
-}
-
-static WEBP_INLINE void FilterLoop26(uint8_t* p,
-                                     int hstride, int vstride, int size,
-                                     int thresh, int ithresh, int hev_thresh) {
-  const int thresh2 = 2 * thresh + 1;
-  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
-  int temp10, temp11, temp12, temp13, temp14, temp15;
-
-  __asm__ volatile (
-    ".set      push                                      \n\t"
-    ".set      noreorder                                 \n\t"
-  "1:                                                    \n\t"
-    "negu      %[temp1],  %[hstride]                     \n\t"
-    "addiu     %[size],   %[size],        -1             \n\t"
-    "sll       %[temp2],  %[hstride],     1              \n\t"
-    "sll       %[temp3],  %[temp1],       1              \n\t"
-    "addu      %[temp4],  %[temp2],       %[hstride]     \n\t"
-    "addu      %[temp5],  %[temp3],       %[temp1]       \n\t"
-    "lbu       %[temp7],  0(%[p])                        \n\t"
-    "sll       %[temp6],  %[temp3],       1              \n\t"
-    "lbux      %[temp8],  %[temp5](%[p])                 \n\t"
-    "lbux      %[temp9],  %[temp3](%[p])                 \n\t"
-    "lbux      %[temp10], %[temp1](%[p])                 \n\t"
-    "lbux      %[temp11], %[temp6](%[p])                 \n\t"
-    "lbux      %[temp12], %[hstride](%[p])               \n\t"
-    "lbux      %[temp13], %[temp2](%[p])                 \n\t"
-    "lbux      %[temp14], %[temp4](%[p])                 \n\t"
-    "subu      %[temp1],  %[temp10],      %[temp7]       \n\t"
-    "subu      %[temp2],  %[temp9],       %[temp12]      \n\t"
-    "absq_s.w  %[temp3],  %[temp1]                       \n\t"
-    "absq_s.w  %[temp4],  %[temp2]                       \n\t"
-    "negu      %[temp1],  %[temp1]                       \n\t"
-    "sll       %[temp3],  %[temp3],       2              \n\t"
-    "addu      %[temp15], %[temp3],       %[temp4]       \n\t"
-    "subu      %[temp3],  %[temp15],      %[thresh2]     \n\t"
-    "sll       %[temp6],  %[temp1],       1              \n\t"
-    "bgtz      %[temp3],  3f                             \n\t"
-    " subu     %[temp4],  %[temp11],      %[temp8]       \n\t"
-    "absq_s.w  %[temp4],  %[temp4]                       \n\t"
-    "shll_s.w  %[temp2],  %[temp2],       24             \n\t"
-    "subu      %[temp4],  %[temp4],       %[ithresh]     \n\t"
-    "bgtz      %[temp4],  3f                             \n\t"
-    " subu     %[temp3],  %[temp8],       %[temp9]       \n\t"
-    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
-    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
-    "bgtz      %[temp3],  3f                             \n\t"
-    " subu     %[temp5],  %[temp9],       %[temp10]      \n\t"
-    "absq_s.w  %[temp3],  %[temp5]                       \n\t"
-    "absq_s.w  %[temp5],  %[temp5]                       \n\t"
-    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
-    "bgtz      %[temp3],  3f                             \n\t"
-    " subu     %[temp3],  %[temp14],      %[temp13]      \n\t"
-    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
-    "slt       %[temp5],  %[hev_thresh],  %[temp5]       \n\t"
-    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
-    "bgtz      %[temp3],  3f                             \n\t"
-    " subu     %[temp3],  %[temp13],      %[temp12]      \n\t"
-    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
-    "sra       %[temp4],  %[temp2],       24             \n\t"
-    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
-    "bgtz      %[temp3],  3f                             \n\t"
-    " subu     %[temp15], %[temp12],      %[temp7]       \n\t"
-    "absq_s.w  %[temp3],  %[temp15]                      \n\t"
-    "absq_s.w  %[temp15], %[temp15]                      \n\t"
-    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
-    "bgtz      %[temp3],  3f                             \n\t"
-    " slt      %[temp15], %[hev_thresh],  %[temp15]      \n\t"
-    "addu      %[temp3],  %[temp6],       %[temp1]       \n\t"
-    "or        %[temp2],  %[temp5],       %[temp15]      \n\t"
-    "addu      %[temp5],  %[temp4],       %[temp3]       \n\t"
-    "beqz      %[temp2],  4f                             \n\t"
-    " shra_r.w %[temp1],  %[temp5],       3              \n\t"
-    "addiu     %[temp2],  %[temp5],       3              \n\t"
-    "sra       %[temp2],  %[temp2],       3              \n\t"
-    "shll_s.w  %[temp1],  %[temp1],       27             \n\t"
-    "shll_s.w  %[temp2],  %[temp2],       27             \n\t"
-    "subu      %[temp3],  %[p],           %[hstride]     \n\t"
-    "sra       %[temp1],  %[temp1],       27             \n\t"
-    "sra       %[temp2],  %[temp2],       27             \n\t"
-    "subu      %[temp1],  %[temp7],       %[temp1]       \n\t"
-    "addu      %[temp2],  %[temp10],      %[temp2]       \n\t"
-    "lbux      %[temp2],  %[temp2](%[VP8kclip1])         \n\t"
-    "lbux      %[temp1],  %[temp1](%[VP8kclip1])         \n\t"
-    "sb        %[temp2],  0(%[temp3])                    \n\t"
-    "j         3f                                        \n\t"
-    " sb       %[temp1],  0(%[p])                        \n\t"
-  "4:                                                    \n\t"
-    "shll_s.w  %[temp5],  %[temp5],       24             \n\t"
-    "subu      %[temp14], %[p],           %[hstride]     \n\t"
-    "subu      %[temp11], %[temp14],      %[hstride]     \n\t"
-    "sra       %[temp6],  %[temp5],       24             \n\t"
-    "sll       %[temp1],  %[temp6],       3              \n\t"
-    "subu      %[temp15], %[temp11],      %[hstride]     \n\t"
-    "addu      %[temp2],  %[temp6],       %[temp1]       \n\t"
-    "sll       %[temp3],  %[temp2],       1              \n\t"
-    "addu      %[temp4],  %[temp3],       %[temp2]       \n\t"
-    "addiu     %[temp2],  %[temp2],       63             \n\t"
-    "addiu     %[temp3],  %[temp3],       63             \n\t"
-    "addiu     %[temp4],  %[temp4],       63             \n\t"
-    "sra       %[temp2],  %[temp2],       7              \n\t"
-    "sra       %[temp3],  %[temp3],       7              \n\t"
-    "sra       %[temp4],  %[temp4],       7              \n\t"
-    "addu      %[temp1],  %[temp8],       %[temp2]       \n\t"
-    "addu      %[temp5],  %[temp9],       %[temp3]       \n\t"
-    "addu      %[temp6],  %[temp10],      %[temp4]       \n\t"
-    "subu      %[temp8],  %[temp7],       %[temp4]       \n\t"
-    "subu      %[temp7],  %[temp12],      %[temp3]       \n\t"
-    "addu      %[temp10], %[p],           %[hstride]     \n\t"
-    "subu      %[temp9],  %[temp13],      %[temp2]       \n\t"
-    "addu      %[temp12], %[temp10],      %[hstride]     \n\t"
-    "lbux      %[temp2],  %[temp1](%[VP8kclip1])         \n\t"
-    "lbux      %[temp3],  %[temp5](%[VP8kclip1])         \n\t"
-    "lbux      %[temp4],  %[temp6](%[VP8kclip1])         \n\t"
-    "lbux      %[temp5],  %[temp8](%[VP8kclip1])         \n\t"
-    "lbux      %[temp6],  %[temp7](%[VP8kclip1])         \n\t"
-    "lbux      %[temp8],  %[temp9](%[VP8kclip1])         \n\t"
-    "sb        %[temp2],  0(%[temp15])                   \n\t"
-    "sb        %[temp3],  0(%[temp11])                   \n\t"
-    "sb        %[temp4],  0(%[temp14])                   \n\t"
-    "sb        %[temp5],  0(%[p])                        \n\t"
-    "sb        %[temp6],  0(%[temp10])                   \n\t"
-    "sb        %[temp8],  0(%[temp12])                   \n\t"
-  "3:                                                    \n\t"
-    "bgtz      %[size],   1b                             \n\t"
-    " addu     %[p],      %[p],           %[vstride]     \n\t"
-    ".set      pop                                       \n\t"
-    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),[temp3]"=&r"(temp3),
-      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
-      [temp7]"=&r"(temp7),[temp8]"=&r"(temp8),[temp9]"=&r"(temp9),
-      [temp10]"=&r"(temp10),[temp11]"=&r"(temp11),[temp12]"=&r"(temp12),
-      [temp13]"=&r"(temp13),[temp14]"=&r"(temp14),[temp15]"=&r"(temp15),
-      [size]"+&r"(size), [p]"+&r"(p)
-    : [hstride]"r"(hstride), [thresh2]"r"(thresh2),
-      [ithresh]"r"(ithresh),[vstride]"r"(vstride), [hev_thresh]"r"(hev_thresh),
-      [VP8kclip1]"r"(VP8kclip1)
-    : "memory"
-  );
-}
-
-static WEBP_INLINE void FilterLoop24(uint8_t* p,
-                                     int hstride, int vstride, int size,
-                                     int thresh, int ithresh, int hev_thresh) {
-  int p0, q0, p1, q1, p2, q2, p3, q3;
-  int step1, step2, temp1, temp2, temp3, temp4;
-  uint8_t* pTemp0;
-  uint8_t* pTemp1;
-  const int thresh2 = 2 * thresh + 1;
-
-  __asm__ volatile (
-    ".set      push                                   \n\t"
-    ".set      noreorder                              \n\t"
-    "bltz      %[size],    3f                         \n\t"
-    " nop                                             \n\t"
-  "2:                                                 \n\t"
-    "negu      %[step1],   %[hstride]                 \n\t"
-    "lbu       %[q0],      0(%[p])                    \n\t"
-    "lbux      %[p0],      %[step1](%[p])             \n\t"
-    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
-    "lbux      %[q1],      %[hstride](%[p])           \n\t"
-    "subu      %[temp1],   %[p0],         %[q0]       \n\t"
-    "lbux      %[p1],      %[step1](%[p])             \n\t"
-    "addu      %[step2],   %[hstride],    %[hstride]  \n\t"
-    "absq_s.w  %[temp2],   %[temp1]                   \n\t"
-    "subu      %[temp3],   %[p1],         %[q1]       \n\t"
-    "absq_s.w  %[temp4],   %[temp3]                   \n\t"
-    "sll       %[temp2],   %[temp2],      2           \n\t"
-    "addu      %[temp2],   %[temp2],      %[temp4]    \n\t"
-    "subu      %[temp4],   %[temp2],      %[thresh2]  \n\t"
-    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
-    "bgtz      %[temp4],   0f                         \n\t"
-    " lbux     %[p2],      %[step1](%[p])             \n\t"
-    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
-    "lbux      %[q2],      %[step2](%[p])             \n\t"
-    "lbux      %[p3],      %[step1](%[p])             \n\t"
-    "subu      %[temp4],   %[p2],         %[p1]       \n\t"
-    "addu      %[step2],   %[step2],      %[hstride]  \n\t"
-    "subu      %[temp2],   %[p3],         %[p2]       \n\t"
-    "absq_s.w  %[temp4],   %[temp4]                   \n\t"
-    "absq_s.w  %[temp2],   %[temp2]                   \n\t"
-    "lbux      %[q3],      %[step2](%[p])             \n\t"
-    "subu      %[temp4],   %[temp4],      %[ithresh]  \n\t"
-    "negu      %[temp1],   %[temp1]                   \n\t"
-    "bgtz      %[temp4],   0f                         \n\t"
-    " subu     %[temp2],   %[temp2],      %[ithresh]  \n\t"
-    "subu      %[p3],      %[p1],         %[p0]       \n\t"
-    "bgtz      %[temp2],   0f                         \n\t"
-    " absq_s.w %[p3],      %[p3]                      \n\t"
-    "subu      %[temp4],   %[q3],         %[q2]       \n\t"
-    "subu      %[pTemp0],  %[p],          %[hstride]  \n\t"
-    "absq_s.w  %[temp4],   %[temp4]                   \n\t"
-    "subu      %[temp2],   %[p3],         %[ithresh]  \n\t"
-    "sll       %[step1],   %[temp1],      1           \n\t"
-    "bgtz      %[temp2],   0f                         \n\t"
-    " subu     %[temp4],   %[temp4],      %[ithresh]  \n\t"
-    "subu      %[temp2],   %[q2],         %[q1]       \n\t"
-    "bgtz      %[temp4],   0f                         \n\t"
-    " absq_s.w %[temp2],   %[temp2]                   \n\t"
-    "subu      %[q3],      %[q1],         %[q0]       \n\t"
-    "absq_s.w  %[q3],      %[q3]                      \n\t"
-    "subu      %[temp2],   %[temp2],      %[ithresh]  \n\t"
-    "addu      %[temp1],   %[temp1],      %[step1]    \n\t"
-    "bgtz      %[temp2],   0f                         \n\t"
-    " subu     %[temp4],   %[q3],         %[ithresh]  \n\t"
-    "slt       %[p3],      %[hev_thresh], %[p3]       \n\t"
-    "bgtz      %[temp4],   0f                         \n\t"
-    " slt      %[q3],      %[hev_thresh], %[q3]       \n\t"
-    "or        %[q3],      %[q3],         %[p3]       \n\t"
-    "bgtz      %[q3],      1f                         \n\t"
-    " shra_r.w %[temp2],   %[temp1],      3           \n\t"
-    "addiu     %[temp1],   %[temp1],      3           \n\t"
-    "sra       %[temp1],   %[temp1],      3           \n\t"
-    "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
-    "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
-    "addu      %[pTemp1],  %[p],          %[hstride]  \n\t"
-    "sra       %[temp2],   %[temp2],      27          \n\t"
-    "sra       %[temp1],   %[temp1],      27          \n\t"
-    "addiu     %[step1],   %[temp2],      1           \n\t"
-    "sra       %[step1],   %[step1],      1           \n\t"
-    "addu      %[p0],      %[p0],         %[temp1]    \n\t"
-    "addu      %[p1],      %[p1],         %[step1]    \n\t"
-    "subu      %[q0],      %[q0],         %[temp2]    \n\t"
-    "subu      %[q1],      %[q1],         %[step1]    \n\t"
-    "lbux      %[temp2],   %[p0](%[VP8kclip1])        \n\t"
-    "lbux      %[temp3],   %[q0](%[VP8kclip1])        \n\t"
-    "lbux      %[temp4],   %[q1](%[VP8kclip1])        \n\t"
-    "sb        %[temp2],   0(%[pTemp0])               \n\t"
-    "lbux      %[temp1],   %[p1](%[VP8kclip1])        \n\t"
-    "subu      %[pTemp0],  %[pTemp0],    %[hstride]   \n\t"
-    "sb        %[temp3],   0(%[p])                    \n\t"
-    "sb        %[temp4],   0(%[pTemp1])               \n\t"
-    "j         0f                                     \n\t"
-    " sb       %[temp1],   0(%[pTemp0])               \n\t"
-  "1:                                                 \n\t"
-    "shll_s.w  %[temp3],   %[temp3],      24          \n\t"
-    "sra       %[temp3],   %[temp3],      24          \n\t"
-    "addu      %[temp1],   %[temp1],      %[temp3]    \n\t"
-    "shra_r.w  %[temp2],   %[temp1],      3           \n\t"
-    "addiu     %[temp1],   %[temp1],      3           \n\t"
-    "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
-    "sra       %[temp1],   %[temp1],      3           \n\t"
-    "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
-    "sra       %[temp2],   %[temp2],      27          \n\t"
-    "sra       %[temp1],   %[temp1],      27          \n\t"
-    "addu      %[p0],      %[p0],         %[temp1]    \n\t"
-    "subu      %[q0],      %[q0],         %[temp2]    \n\t"
-    "lbux      %[temp1],   %[p0](%[VP8kclip1])        \n\t"
-    "lbux      %[temp2],   %[q0](%[VP8kclip1])        \n\t"
-    "sb        %[temp2],   0(%[p])                    \n\t"
-    "sb        %[temp1],   0(%[pTemp0])               \n\t"
-  "0:                                                 \n\t"
-    "subu      %[size],    %[size],       1           \n\t"
-    "bgtz      %[size],    2b                         \n\t"
-    " addu     %[p],       %[p],          %[vstride]  \n\t"
-  "3:                                                 \n\t"
-    ".set      pop                                    \n\t"
-    : [p0]"=&r"(p0), [q0]"=&r"(q0), [p1]"=&r"(p1), [q1]"=&r"(q1),
-      [p2]"=&r"(p2), [q2]"=&r"(q2), [p3]"=&r"(p3), [q3]"=&r"(q3),
-      [step2]"=&r"(step2), [step1]"=&r"(step1), [temp1]"=&r"(temp1),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
-      [pTemp0]"=&r"(pTemp0), [pTemp1]"=&r"(pTemp1), [p]"+&r"(p),
-      [size]"+&r"(size)
-    : [vstride]"r"(vstride), [ithresh]"r"(ithresh),
-      [hev_thresh]"r"(hev_thresh), [hstride]"r"(hstride),
-      [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
-    : "memory"
-  );
-}
-
-// on macroblock edges
-static void VFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
-}
-
-static void HFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
-}
-
-// 8-pixels wide variant, for chroma filtering
-static void VFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
-  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
-}
-
-static void HFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
-  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
-}
-
-// on three inner edges
-static void VFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4 * stride;
-    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
-  }
-}
-
-static void HFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4;
-    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
-  }
-}
-
-static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
-}
-
-static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
-}
-
-#undef MUL
-
-//------------------------------------------------------------------------------
-// Simple In-loop filtering (Paragraph 15.2)
-
-static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
-  int i;
-  const int thresh2 = 2 * thresh + 1;
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
-  uint8_t* p1 = p - stride;
-  __asm__ volatile (
-    ".set      push                                      \n\t"
-    ".set      noreorder                                 \n\t"
-    "li        %[i],        16                           \n\t"
-  "0:                                                    \n\t"
-    "negu      %[temp4],    %[stride]                    \n\t"
-    "sll       %[temp5],    %[temp4],       1            \n\t"
-    "lbu       %[temp2],    0(%[p])                      \n\t"
-    "lbux      %[temp3],    %[stride](%[p])              \n\t"
-    "lbux      %[temp1],    %[temp4](%[p])               \n\t"
-    "lbux      %[temp0],    %[temp5](%[p])               \n\t"
-    "subu      %[temp7],    %[temp1],       %[temp2]     \n\t"
-    "subu      %[temp6],    %[temp0],       %[temp3]     \n\t"
-    "absq_s.w  %[temp4],    %[temp7]                     \n\t"
-    "absq_s.w  %[temp5],    %[temp6]                     \n\t"
-    "sll       %[temp4],    %[temp4],       2            \n\t"
-    "subu      %[temp5],    %[temp5],       %[thresh2]   \n\t"
-    "addu      %[temp5],    %[temp4],       %[temp5]     \n\t"
-    "negu      %[temp8],    %[temp7]                     \n\t"
-    "bgtz      %[temp5],    1f                           \n\t"
-    " addiu    %[i],        %[i],           -1           \n\t"
-    "sll       %[temp4],    %[temp8],       1            \n\t"
-    "shll_s.w  %[temp5],    %[temp6],       24           \n\t"
-    "addu      %[temp3],    %[temp4],       %[temp8]     \n\t"
-    "sra       %[temp5],    %[temp5],       24           \n\t"
-    "addu      %[temp3],    %[temp3],       %[temp5]     \n\t"
-    "addiu     %[temp7],    %[temp3],       3            \n\t"
-    "sra       %[temp7],    %[temp7],       3            \n\t"
-    "shra_r.w  %[temp8],    %[temp3],       3            \n\t"
-    "shll_s.w  %[temp0],    %[temp7],       27           \n\t"
-    "shll_s.w  %[temp4],    %[temp8],       27           \n\t"
-    "sra       %[temp0],    %[temp0],       27           \n\t"
-    "sra       %[temp4],    %[temp4],       27           \n\t"
-    "addu      %[temp7],    %[temp1],       %[temp0]     \n\t"
-    "subu      %[temp2],    %[temp2],       %[temp4]     \n\t"
-    "lbux      %[temp3],    %[temp7](%[VP8kclip1])       \n\t"
-    "lbux      %[temp4],    %[temp2](%[VP8kclip1])       \n\t"
-    "sb        %[temp3],    0(%[p1])                     \n\t"
-    "sb        %[temp4],    0(%[p])                      \n\t"
-  "1:                                                    \n\t"
-    "addiu     %[p1],       %[p1],          1            \n\t"
-    "bgtz      %[i],        0b                           \n\t"
-    " addiu    %[p],        %[p],           1            \n\t"
-    " .set     pop                                       \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
-      [p]"+&r"(p), [i]"=&r"(i), [p1]"+&r"(p1)
-    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
-    : "memory"
-  );
-}
-
-// TEMP0 = SRC[A + A1 * BPS]
-// TEMP1 = SRC[B + B1 * BPS]
-// TEMP2 = SRC[C + C1 * BPS]
-// TEMP3 = SRC[D + D1 * BPS]
-#define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3,                               \
-                     A, A1, B, B1, C, C1, D, D1, SRC)                          \
-  "lbu      %[" #TEMP0 "],   " #A "+" #A1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
-  "lbu      %[" #TEMP1 "],   " #B "+" #B1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
-  "lbu      %[" #TEMP2 "],   " #C "+" #C1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
-  "lbu      %[" #TEMP3 "],   " #D "+" #D1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
-
-static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
-  int i;
-  const int thresh2 = 2 * thresh + 1;
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
-  __asm__ volatile (
-    ".set      push                                     \n\t"
-    ".set      noreorder                                \n\t"
-    "li        %[i],       16                           \n\t"
-  "0:                                                   \n\t"
-    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -2, 0, -1, 0, 0, 0, 1, 0, p)
-    "subu      %[temp7],    %[temp1],       %[temp2]    \n\t"
-    "subu      %[temp6],    %[temp0],       %[temp3]    \n\t"
-    "absq_s.w  %[temp4],    %[temp7]                    \n\t"
-    "absq_s.w  %[temp5],    %[temp6]                    \n\t"
-    "sll       %[temp4],    %[temp4],       2           \n\t"
-    "addu      %[temp5],    %[temp4],       %[temp5]    \n\t"
-    "subu      %[temp5],    %[temp5],       %[thresh2]  \n\t"
-    "negu      %[temp8],    %[temp7]                    \n\t"
-    "bgtz      %[temp5],    1f                          \n\t"
-    " addiu    %[i],        %[i],           -1          \n\t"
-    "sll       %[temp4],    %[temp8],       1           \n\t"
-    "shll_s.w  %[temp5],    %[temp6],       24          \n\t"
-    "addu      %[temp3],    %[temp4],       %[temp8]    \n\t"
-    "sra       %[temp5],    %[temp5],       24          \n\t"
-    "addu      %[temp3],    %[temp3],       %[temp5]    \n\t"
-    "addiu     %[temp7],    %[temp3],       3           \n\t"
-    "sra       %[temp7],    %[temp7],       3           \n\t"
-    "shra_r.w  %[temp8],    %[temp3],       3           \n\t"
-    "shll_s.w  %[temp0],    %[temp7],       27          \n\t"
-    "shll_s.w  %[temp4],    %[temp8],       27          \n\t"
-    "sra       %[temp0],    %[temp0],       27          \n\t"
-    "sra       %[temp4],    %[temp4],       27          \n\t"
-    "addu      %[temp7],    %[temp1],       %[temp0]    \n\t"
-    "subu      %[temp2],    %[temp2],       %[temp4]    \n\t"
-    "lbux      %[temp3],    %[temp7](%[VP8kclip1])      \n\t"
-    "lbux      %[temp4],    %[temp2](%[VP8kclip1])      \n\t"
-    "sb        %[temp3],    -1(%[p])                    \n\t"
-    "sb        %[temp4],    0(%[p])                     \n\t"
-  "1:                                                   \n\t"
-    "bgtz      %[i],        0b                          \n\t"
-    " addu     %[p],        %[p],           %[stride]   \n\t"
-    ".set      pop                                      \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
-      [p]"+&r"(p), [i]"=&r"(i)
-    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
-    : "memory"
-  );
-}
-
-static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4 * stride;
-    SimpleVFilter16(p, stride, thresh);
-  }
-}
-
-static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4;
-    SimpleHFilter16(p, stride, thresh);
-  }
-}
-
-// DST[A * BPS]     = TEMP0
-// DST[B + C * BPS] = TEMP1
-#define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST)                              \
-  "usw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #DST "])         \n\t"     \
-  "usw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #DST "])  \n\t"
-
-static void VE4(uint8_t* dst) {    // vertical
-  const uint8_t* top = dst - BPS;
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
-  __asm__ volatile (
-    "ulw             %[temp0],   -1(%[top])              \n\t"
-    "ulh             %[temp1],   3(%[top])               \n\t"
-    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
-    "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t"
-    "preceu.ph.qbr   %[temp4],   %[temp1]                \n\t"
-    "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t"
-    "packrl.ph       %[temp6],   %[temp4],    %[temp3]   \n\t"
-    "shll.ph         %[temp5],   %[temp5],    1          \n\t"
-    "shll.ph         %[temp6],   %[temp6],    1          \n\t"
-    "addq.ph         %[temp2],   %[temp5],    %[temp2]   \n\t"
-    "addq.ph         %[temp6],   %[temp6],    %[temp4]   \n\t"
-    "addq.ph         %[temp2],   %[temp2],    %[temp3]   \n\t"
-    "addq.ph         %[temp6],   %[temp6],    %[temp3]   \n\t"
-    "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
-    "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
-    "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t"
-    STORE_8_BYTES(temp4, temp4, 0, 0, 1, dst)
-    STORE_8_BYTES(temp4, temp4, 2, 0, 3, dst)
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6)
-    : [top]"r"(top), [dst]"r"(dst)
-    : "memory"
-  );
-}
-
-static void DC4(uint8_t* dst) {   // DC
-  int temp0, temp1, temp2, temp3, temp4;
-  __asm__ volatile (
-    "ulw          %[temp0],   -1*" XSTR(BPS) "(%[dst]) \n\t"
-    LOAD_4_BYTES(temp1, temp2, temp3, temp4, -1, 0, -1, 1, -1, 2, -1, 3, dst)
-    "ins          %[temp1],   %[temp2],    8,     8    \n\t"
-    "ins          %[temp1],   %[temp3],    16,    8    \n\t"
-    "ins          %[temp1],   %[temp4],    24,    8    \n\t"
-    "raddu.w.qb   %[temp0],   %[temp0]                 \n\t"
-    "raddu.w.qb   %[temp1],   %[temp1]                 \n\t"
-    "addu         %[temp0],   %[temp0],    %[temp1]    \n\t"
-    "shra_r.w     %[temp0],   %[temp0],    3           \n\t"
-    "replv.qb     %[temp0],   %[temp0]                 \n\t"
-    STORE_8_BYTES(temp0, temp0, 0, 0, 1, dst)
-    STORE_8_BYTES(temp0, temp0, 2, 0, 3, dst)
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
-    : [dst]"r"(dst)
-    : "memory"
-  );
-}
-
-static void RD4(uint8_t* dst) {   // Down-right
-  int temp0, temp1, temp2, temp3, temp4;
-  int temp5, temp6, temp7, temp8;
-  __asm__ volatile (
-    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -1, 0, -1, 1, -1, 2, -1, 3, dst)
-    "ulw            %[temp7],   -1-" XSTR(BPS) "(%[dst])       \n\t"
-    "ins            %[temp1],   %[temp0], 16, 16               \n\t"
-    "preceu.ph.qbr  %[temp5],   %[temp7]                       \n\t"
-    "ins            %[temp2],   %[temp1], 16, 16               \n\t"
-    "preceu.ph.qbl  %[temp4],   %[temp7]                       \n\t"
-    "ins            %[temp3],   %[temp2], 16, 16               \n\t"
-    "shll.ph        %[temp2],   %[temp2], 1                    \n\t"
-    "addq.ph        %[temp3],   %[temp3], %[temp1]             \n\t"
-    "packrl.ph      %[temp6],   %[temp5], %[temp1]             \n\t"
-    "addq.ph        %[temp3],   %[temp3], %[temp2]             \n\t"
-    "addq.ph        %[temp1],   %[temp1], %[temp5]             \n\t"
-    "shll.ph        %[temp6],   %[temp6], 1                    \n\t"
-    "addq.ph        %[temp1],   %[temp1], %[temp6]             \n\t"
-    "packrl.ph      %[temp0],   %[temp4], %[temp5]             \n\t"
-    "addq.ph        %[temp8],   %[temp5], %[temp4]             \n\t"
-    "shra_r.ph      %[temp3],   %[temp3], 2                    \n\t"
-    "shll.ph        %[temp0],   %[temp0], 1                    \n\t"
-    "shra_r.ph      %[temp1],   %[temp1], 2                    \n\t"
-    "addq.ph        %[temp8],   %[temp0], %[temp8]             \n\t"
-    "lbu            %[temp5],   3-" XSTR(BPS) "(%[dst])        \n\t"
-    "precrq.ph.w    %[temp7],   %[temp7], %[temp7]             \n\t"
-    "shra_r.ph      %[temp8],   %[temp8], 2                    \n\t"
-    "ins            %[temp7],   %[temp5], 0,  8                \n\t"
-    "precr.qb.ph    %[temp2],   %[temp1], %[temp3]             \n\t"
-    "raddu.w.qb     %[temp4],   %[temp7]                       \n\t"
-    "precr.qb.ph    %[temp6],   %[temp8], %[temp1]             \n\t"
-    "shra_r.w       %[temp4],   %[temp4], 2                    \n\t"
-    STORE_8_BYTES(temp2, temp6, 3, 0, 1, dst)
-    "prepend        %[temp2],   %[temp8], 8                    \n\t"
-    "prepend        %[temp6],   %[temp4], 8                    \n\t"
-    STORE_8_BYTES(temp2, temp6, 2, 0, 0, dst)
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
-    : [dst]"r"(dst)
-    : "memory"
-  );
-}
-
-// TEMP0 = SRC[A * BPS]
-// TEMP1 = SRC[B + C * BPS]
-#define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC)                               \
-  "ulw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #SRC "])         \n\t"     \
-  "ulw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #SRC "])  \n\t"
-
-static void LD4(uint8_t* dst) {   // Down-Left
-  int temp0, temp1, temp2, temp3, temp4;
-  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
-    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
-    "preceu.ph.qbl   %[temp2],    %[temp0]                     \n\t"
-    "preceu.ph.qbr   %[temp3],    %[temp0]                     \n\t"
-    "preceu.ph.qbr   %[temp4],    %[temp1]                     \n\t"
-    "preceu.ph.qbl   %[temp5],    %[temp1]                     \n\t"
-    "packrl.ph       %[temp6],    %[temp2],    %[temp3]        \n\t"
-    "packrl.ph       %[temp7],    %[temp4],    %[temp2]        \n\t"
-    "packrl.ph       %[temp8],    %[temp5],    %[temp4]        \n\t"
-    "shll.ph         %[temp6],    %[temp6],    1               \n\t"
-    "addq.ph         %[temp9],    %[temp2],    %[temp6]        \n\t"
-    "shll.ph         %[temp7],    %[temp7],    1               \n\t"
-    "addq.ph         %[temp9],    %[temp9],    %[temp3]        \n\t"
-    "shll.ph         %[temp8],    %[temp8],    1               \n\t"
-    "shra_r.ph       %[temp9],    %[temp9],    2               \n\t"
-    "addq.ph         %[temp3],    %[temp4],    %[temp7]        \n\t"
-    "addq.ph         %[temp0],    %[temp5],    %[temp8]        \n\t"
-    "addq.ph         %[temp3],    %[temp3],    %[temp2]        \n\t"
-    "addq.ph         %[temp0],    %[temp0],    %[temp4]        \n\t"
-    "shra_r.ph       %[temp3],    %[temp3],    2               \n\t"
-    "shra_r.ph       %[temp0],    %[temp0],    2               \n\t"
-    "srl             %[temp1],    %[temp1],    24              \n\t"
-    "sll             %[temp1],    %[temp1],    1               \n\t"
-    "raddu.w.qb      %[temp5],    %[temp5]                     \n\t"
-    "precr.qb.ph     %[temp9],    %[temp3],    %[temp9]        \n\t"
-    "precr.qb.ph     %[temp3],    %[temp0],    %[temp3]        \n\t"
-    "addu            %[temp1],    %[temp1],    %[temp5]        \n\t"
-    "shra_r.w        %[temp1],    %[temp1],    2               \n\t"
-    STORE_8_BYTES(temp9, temp3, 0, 0, 2, dst)
-    "prepend         %[temp9],    %[temp0],    8               \n\t"
-    "prepend         %[temp3],    %[temp1],    8               \n\t"
-    STORE_8_BYTES(temp9, temp3, 1, 0, 3, dst)
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
-      [temp9]"=&r"(temp9)
-    : [dst]"r"(dst)
-    : "memory"
-  );
-}
-
-//------------------------------------------------------------------------------
-// Chroma
-
-static void DC8uv(uint8_t* dst) {     // DC
-  int temp0, temp1, temp2, temp3, temp4;
-  int temp5, temp6, temp7, temp8, temp9;
-  __asm__ volatile (
-    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
-    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
-    LOAD_4_BYTES(temp6, temp7, temp8, temp9, -1, 4, -1, 5, -1, 6, -1, 7, dst)
-    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
-    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
-    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
-    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
-    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
-    "addu         %[temp8],   %[temp8],    %[temp9]      \n\t"
-    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
-    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
-    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
-    "addu         %[temp0],   %[temp0],    %[temp2]      \n\t"
-    "addu         %[temp0],   %[temp0],    %[temp6]      \n\t"
-    "shra_r.w     %[temp0],   %[temp0],    4             \n\t"
-    "replv.qb     %[temp0],   %[temp0]                   \n\t"
-    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
-    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
-    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
-    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
-    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
-    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
-    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
-    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
-      [temp9]"=&r"(temp9)
-    : [dst]"r"(dst)
-    : "memory"
-  );
-}
-
-static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
-  int temp0, temp1;
-  __asm__ volatile (
-    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
-    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
-    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
-    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
-    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
-    "replv.qb     %[temp0],   %[temp0]                   \n\t"
-    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
-    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
-    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
-    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
-    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
-    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
-    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
-    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
-    : [dst]"r"(dst)
-    : "memory"
-  );
-}
-
-static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
-  int temp0, temp1, temp2, temp3, temp4;
-  int temp5, temp6, temp7, temp8;
-  __asm__ volatile (
-    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
-    LOAD_4_BYTES(temp6, temp7, temp8, temp1, -1, 4, -1, 5, -1, 6, -1, 7, dst)
-    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
-    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
-    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
-    "addu         %[temp8],   %[temp8],    %[temp1]      \n\t"
-    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
-    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
-    "addu         %[temp0],   %[temp6],    %[temp2]      \n\t"
-    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
-    "replv.qb     %[temp0],   %[temp0]                   \n\t"
-    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
-    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
-    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
-    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
-    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
-    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
-    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
-    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
-    : [dst]"r"(dst)
-    : "memory"
-  );
-}
-
-#undef LOAD_8_BYTES
-#undef STORE_8_BYTES
-#undef LOAD_4_BYTES
-
-#define CLIPPING(SIZE)                                                         \
-  "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t"                 \
-  "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t"                 \
-".if " #SIZE " == 8                                      \n\t"                 \
-  "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t"                 \
-  "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t"                 \
-".endif                                                  \n\t"                 \
-  "addu.ph         %[temp2],   %[temp2],   %[dst_1]      \n\t"                 \
-  "addu.ph         %[temp0],   %[temp0],   %[dst_1]      \n\t"                 \
-".if " #SIZE " == 8                                      \n\t"                 \
-  "addu.ph         %[temp3],   %[temp3],   %[dst_1]      \n\t"                 \
-  "addu.ph         %[temp1],   %[temp1],   %[dst_1]      \n\t"                 \
-".endif                                                  \n\t"                 \
-  "shll_s.ph       %[temp2],   %[temp2],   7             \n\t"                 \
-  "shll_s.ph       %[temp0],   %[temp0],   7             \n\t"                 \
-".if " #SIZE " == 8                                      \n\t"                 \
-  "shll_s.ph       %[temp3],   %[temp3],   7             \n\t"                 \
-  "shll_s.ph       %[temp1],   %[temp1],   7             \n\t"                 \
-".endif                                                  \n\t"                 \
-  "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t"                 \
-".if " #SIZE " == 8                                      \n\t"                 \
-  "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"                 \
-".endif                                                  \n\t"
-
-
-#define CLIP_8B_TO_DST(DST, TOP, SIZE) do {                                    \
-  int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1];                              \
-  int temp0, temp1, temp2, temp3;                                              \
-  __asm__ volatile (                                                           \
-  ".if " #SIZE " < 8                                     \n\t"                 \
-    "ulw             %[temp0],   0(%[top])               \n\t"                 \
-    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
-    CLIPPING(4)                                                                \
-    "usw             %[temp0],   0(%[dst])               \n\t"                 \
-  ".else                                                 \n\t"                 \
-    "ulw             %[temp0],   0(%[top])               \n\t"                 \
-    "ulw             %[temp1],   4(%[top])               \n\t"                 \
-    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
-    CLIPPING(8)                                                                \
-    "usw             %[temp0],   0(%[dst])               \n\t"                 \
-    "usw             %[temp1],   4(%[dst])               \n\t"                 \
-  ".if " #SIZE " == 16                                   \n\t"                 \
-    "ulw             %[temp0],   8(%[top])               \n\t"                 \
-    "ulw             %[temp1],   12(%[top])              \n\t"                 \
-    CLIPPING(8)                                                                \
-    "usw             %[temp0],   8(%[dst])               \n\t"                 \
-    "usw             %[temp1],   12(%[dst])              \n\t"                 \
-  ".endif                                                \n\t"                 \
-  ".endif                                                \n\t"                 \
-    : [dst_1]"+&r"(dst_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),           \
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)                                 \
-    : [top_1]"r"(top_1), [top]"r"((TOP)), [dst]"r"((DST))                      \
-    : "memory"                                                                 \
-  );                                                                           \
-} while (0)
-
-#define CLIP_TO_DST(DST, SIZE) do {                                            \
-  int y;                                                                       \
-  const uint8_t* top = (DST) - BPS;                                            \
-  const int top_1 = ((int)top[-1] << 16) + top[-1];                            \
-  for (y = 0; y < (SIZE); ++y) {                                               \
-    CLIP_8B_TO_DST((DST), top, (SIZE));                                        \
-    (DST) += BPS;                                                              \
-  }                                                                            \
-} while (0)
-
-#define TRUE_MOTION(DST, SIZE)                                                 \
-static void TrueMotion##SIZE(uint8_t* (DST)) {                                 \
-  CLIP_TO_DST((DST), (SIZE));                                                  \
-}
-
-TRUE_MOTION(dst, 4)
-TRUE_MOTION(dst, 8)
-TRUE_MOTION(dst, 16)
-
-#undef TRUE_MOTION
-#undef CLIP_TO_DST
-#undef CLIP_8B_TO_DST
-#undef CLIPPING
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8DspInitMIPSdspR2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPSdspR2(void) {
-  VP8TransformDC = TransformDC;
-  VP8TransformAC3 = TransformAC3;
-  VP8Transform = TransformTwo;
-
-  VP8VFilter16 = VFilter16;
-  VP8HFilter16 = HFilter16;
-  VP8VFilter8 = VFilter8;
-  VP8HFilter8 = HFilter8;
-  VP8VFilter16i = VFilter16i;
-  VP8HFilter16i = HFilter16i;
-  VP8VFilter8i = VFilter8i;
-  VP8HFilter8i = HFilter8i;
-  VP8SimpleVFilter16 = SimpleVFilter16;
-  VP8SimpleHFilter16 = SimpleHFilter16;
-  VP8SimpleVFilter16i = SimpleVFilter16i;
-  VP8SimpleHFilter16i = SimpleHFilter16i;
-
-  VP8PredLuma4[0] = DC4;
-  VP8PredLuma4[1] = TrueMotion4;
-  VP8PredLuma4[2] = VE4;
-  VP8PredLuma4[4] = RD4;
-  VP8PredLuma4[6] = LD4;
-
-  VP8PredChroma8[0] = DC8uv;
-  VP8PredChroma8[1] = TrueMotion8;
-  VP8PredChroma8[4] = DC8uvNoTop;
-  VP8PredChroma8[5] = DC8uvNoLeft;
-
-  VP8PredLuma16[1] = TrueMotion16;
-}
-
-#else  // !WEBP_USE_MIPS_DSP_R2
-
-WEBP_DSP_INIT_STUB(VP8DspInitMIPSdspR2)
-
-#endif  // WEBP_USE_MIPS_DSP_R2
--- a/src/dsp/dec_neon.c
+++ b/src/dsp/dec_neon.c
--- a/src/dsp/dec_sse2.c
+++ b/src/dsp/dec_sse2.c
--- a/src/dsp/dec_sse41.c
+++ b/src/dsp/dec_sse41.c
@ -1,45 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// SSE4 version of some decoding functions.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_SSE41)
-
-#include <smmintrin.h>
-#include "../dec/vp8i.h"
-
-static void HE16(uint8_t* dst) {     // horizontal
-  int j;
-  const __m128i kShuffle3 = _mm_set1_epi8(3);
-  for (j = 16; j > 0; --j) {
-    const __m128i in = _mm_cvtsi32_si128(WebPMemToUint32(dst - 4));
-    const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
-    _mm_storeu_si128((__m128i*)dst, values);
-    dst += BPS;
-  }
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8DspInitSSE41(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
-  VP8PredLuma16[3] = HE16;
-}
-
-#else  // !WEBP_USE_SSE41
-
-WEBP_DSP_INIT_STUB(VP8DspInitSSE41)
-
-#endif  // WEBP_USE_SSE41
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@ -15,114 +15,38 @@
 #define WEBP_DSP_DSP_H_

 #include "../webp/types.h"
-#include "../utils/utils.h"

-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif

-#define BPS 32   // this is the common stride for enc/dec
-
 //------------------------------------------------------------------------------
 // CPU detection

-#if defined(__GNUC__)
-# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
-# define LOCAL_GCC_PREREQ(maj, min) \
-    (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_GCC_VERSION 0
-# define LOCAL_GCC_PREREQ(maj, min) 0
-#endif
-
-#ifndef __has_builtin
-# define __has_builtin(x) 0
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER > 1310 && \
-    (defined(_M_X64) || defined(_M_IX86))
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
 #define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
 #endif

-#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
-    (defined(_M_X64) || defined(_M_IX86))
-#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
-#endif
-
-// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
-// files without intrinsics, allowing the corresponding Init() to be called.
-// Files containing intrinsics will need to be built targeting the instruction
-// set so should succeed on one of the earlier tests.
-#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2)
+#if defined(__SSE2__) || defined(WEBP_MSC_SSE2)
 #define WEBP_USE_SSE2
 #endif

-#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41)
-#define WEBP_USE_SSE41
-#endif
-
-#if defined(__AVX2__) || defined(WEBP_HAVE_AVX2)
-#define WEBP_USE_AVX2
-#endif
-
 #if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
 #define WEBP_ANDROID_NEON  // Android targets that might support NEON
 #endif

-// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
-// inline assembly would need to be modified for use with Native Client.
-#if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || \
-     defined(__aarch64__)) && !defined(__native_client__)
+#if defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON)
 #define WEBP_USE_NEON
 #endif

-#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
-#define WEBP_USE_NEON
-#define WEBP_USE_INTRINSICS
-#endif
-
-#if defined(__mips__) && !defined(__mips64) && \
-    defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
-#define WEBP_USE_MIPS32
-#if (__mips_isa_rev >= 2)
-#define WEBP_USE_MIPS32_R2
-#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
-#define WEBP_USE_MIPS_DSP_R2
-#endif
-#endif
-#endif
-
-// This macro prevents thread_sanitizer from reporting known concurrent writes.
-#define WEBP_TSAN_IGNORE_FUNCTION
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-#undef WEBP_TSAN_IGNORE_FUNCTION
-#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
-#endif
-#endif
-
 typedef enum {
  kSSE2,
  kSSE3,
-  kSSE4_1,
-  kAVX,
-  kAVX2,
-  kNEON,
-  kMIPS32,
-  kMIPSdspR2
+  kNEON
 } CPUFeature;
 // returns true if the CPU supports the feature.
 typedef int (*VP8CPUInfo)(CPUFeature feature);
-WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;
-
-//------------------------------------------------------------------------------
-// Init stub generator
-
-// Defines an init function stub to ensure each module exposes a symbol,
-// avoiding a compiler warning.
-#define WEBP_DSP_INIT_STUB(func) \
-  extern void func(void); \
-  WEBP_TSAN_IGNORE_FUNCTION void func(void) {}
+extern VP8CPUInfo VP8GetCPUInfo;

 //------------------------------------------------------------------------------
 // Encoding
@ -136,7 +60,7 @@ typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
 typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
 extern VP8Idct VP8ITransform;
 extern VP8Fdct VP8FTransform;
-extern VP8Fdct VP8FTransform2;   // performs two transforms at a time
+extern VP8WHT VP8ITransformWHT;
 extern VP8WHT VP8FTransformWHT;
 // Predictions
 // *dst is the destination block. *top and *left can be NULL.
@ -155,63 +79,21 @@ extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;

 typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
 extern VP8BlockCopy VP8Copy4x4;
-extern VP8BlockCopy VP8Copy16x8;
 // Quantization
 struct VP8Matrix;   // forward declaration
 typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
-                                const struct VP8Matrix* const mtx);
-// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
-typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
-                                  const struct VP8Matrix* const mtx);
-
+                                int n, const struct VP8Matrix* const mtx);
 extern VP8QuantizeBlock VP8EncQuantizeBlock;
-extern VP8Quantize2Blocks VP8EncQuantize2Blocks;

-// specific to 2nd transform:
-typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
-                                   const struct VP8Matrix* const mtx);
-extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
-
-extern const int VP8DspScan[16 + 4 + 4];
-
-// Collect histogram for susceptibility calculation.
-#define MAX_COEFF_THRESH   31   // size of histogram used by CollectHistogram.
-typedef struct {
-  // We only need to store max_value and last_non_zero, not the distribution.
-  int max_value;
-  int last_non_zero;
-} VP8Histogram;
+// Collect histogram for susceptibility calculation and accumulate in histo[].
+struct VP8Histogram;
 typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
                          int start_block, int end_block,
-                          VP8Histogram* const histo);
+                          struct VP8Histogram* const histo);
+extern const int VP8DspScan[16 + 4 + 4];
 extern VP8CHisto VP8CollectHistogram;
-// General-purpose util function to help VP8CollectHistogram().
-void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
-                         VP8Histogram* const histo);

-// must be called before using any of the above
-void VP8EncDspInit(void);
-
-//------------------------------------------------------------------------------
-// cost functions (encoding)
-
-extern const uint16_t VP8EntropyCost[256];        // 8bit fixed-point log(p)
-// approximate cost per level:
-extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
-extern const uint8_t VP8EncBands[16 + 1];
-
-struct VP8Residual;
-typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
-                                         struct VP8Residual* const res);
-extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
-
-// Cost calculation function.
-typedef int (*VP8GetResidualCostFunc)(int ctx0,
-                                      const struct VP8Residual* const res);
-extern VP8GetResidualCostFunc VP8GetResidualCost;
-
-// must be called before anything using the above
-void VP8EncDspCostInit(void);
+void VP8EncDspInit(void);   // must be called before using any of the above

 //------------------------------------------------------------------------------
 // Decoding
@ -220,7 +102,6 @@ typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
 // when doing two transforms, coeffs is actually int16_t[2][16].
 typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
 extern VP8DecIdct2 VP8Transform;
-extern VP8DecIdct VP8TransformAC3;
 extern VP8DecIdct VP8TransformUV;
 extern VP8DecIdct VP8TransformDC;
 extern VP8DecIdct VP8TransformDCUV;
@ -229,17 +110,9 @@ extern VP8WHT VP8TransformWHT;
 // *dst is the destination block, with stride BPS. Boundary samples are
 // assumed accessible when needed.
 typedef void (*VP8PredFunc)(uint8_t* dst);
-extern VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
-extern VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
-extern VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
-
-// clipping tables (for filtering)
-extern const int8_t* const VP8ksclip1;  // clips [-1020, 1020] to [-128, 127]
-extern const int8_t* const VP8ksclip2;  // clips [-112, 112] to [-16, 15]
-extern const uint8_t* const VP8kclip1;  // clips [-255,511] to [0,255]
-extern const uint8_t* const VP8kabs0;   // abs(x) for x in [-255,255]
-// must be called first
-void VP8InitClipTables(void);
+extern const VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
+extern const VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
+extern const VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];

 // simple filter (only for luma)
 typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh);
@ -273,8 +146,6 @@ void VP8DspInit(void);

 #define FANCY_UPSAMPLING   // undefined to remove fancy upsampling support

-// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
-// bottom_y can be NULL if only one line of output is needed (at top/bottom).
 typedef void (*WebPUpsampleLinePairFunc)(
    const uint8_t* top_y, const uint8_t* bottom_y,
    const uint8_t* top_u, const uint8_t* top_v,
@ -286,20 +157,21 @@ typedef void (*WebPUpsampleLinePairFunc)(
 // Fancy upsampling functions to convert YUV to RGB(A) modes
 extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];

+// Initializes SSE2 version of the fancy upsamplers.
+void WebPInitUpsamplersSSE2(void);
+
+// NEON version
+void WebPInitUpsamplersNEON(void);
+
 #endif    // FANCY_UPSAMPLING

-// Per-row point-sampling methods.
-typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
-                                   const uint8_t* u, const uint8_t* v,
-                                   uint8_t* dst, int len);
-// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
-void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
-                             const uint8_t* u, const uint8_t* v, int uv_stride,
-                             uint8_t* dst, int dst_stride,
-                             int width, int height, WebPSamplerRowFunc func);
+// Point-sampling methods.
+typedef void (*WebPSampleLinePairFunc)(
+    const uint8_t* top_y, const uint8_t* bottom_y,
+    const uint8_t* u, const uint8_t* v,
+    uint8_t* top_dst, uint8_t* bottom_dst, int len);

-// Sampling functions to convert rows of YUV to RGB(A)
-extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
+extern const WebPSampleLinePairFunc WebPSamplers[/* MODE_LAST */];

 // General function for converting two lines of ARGB or RGBA.
 // 'alpha_is_last' should be true if 0xff000000 is stored in memory as
@ -311,84 +183,13 @@ typedef void (*WebPYUV444Converter)(const uint8_t* y,
                                    const uint8_t* u, const uint8_t* v,
                                    uint8_t* dst, int len);

-extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+extern const WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];

-// Must be called before using the WebPUpsamplers[] (and for premultiplied
-// colorspaces like rgbA, rgbA4444, etc)
+// Main function to be called
 void WebPInitUpsamplers(void);
-// Must be called before using WebPSamplers[]
-void WebPInitSamplers(void);
-// Must be called before using WebPYUV444Converters[]
-void WebPInitYUV444Converters(void);

 //------------------------------------------------------------------------------
-// ARGB -> YUV converters
-
-// Convert ARGB samples to luma Y.
-extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
-// Convert ARGB samples to U/V with downsampling. do_store should be '1' for
-// even lines and '0' for odd ones. 'src_width' is the original width, not
-// the U/V one.
-extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
-                                   int src_width, int do_store);
-
-// Convert a row of accumulated (four-values) of rgba32 toward U/V
-extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
-                                     uint8_t* u, uint8_t* v, int width);
-
-// Convert RGB or BGR to Y
-extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
-extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
-
-// used for plain-C fallback.
-extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
-                                  int src_width, int do_store);
-extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
-                                    uint8_t* u, uint8_t* v, int width);
-
-// Must be called before using the above.
-void WebPInitConvertARGBToYUV(void);
-
-//------------------------------------------------------------------------------
-// Rescaler
-
-struct WebPRescaler;
-
-// Import a row of data and save its contribution in the rescaler.
-// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
-// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
-typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk,
-                                          const uint8_t* src);
-
-extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
-extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
-
-// Export one row (starting at x_out position) from rescaler.
-// 'Expand' corresponds to the wrk->y_expand case.
-// Otherwise 'Shrink' is to be used
-typedef void (*WebPRescalerExportRowFunc)(struct WebPRescaler* const wrk);
-extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
-extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
-
-// Plain-C implementation, as fall-back.
-extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk,
-                                         const uint8_t* src);
-extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk,
-                                         const uint8_t* src);
-extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
-extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);
-
-// Main entry calls:
-extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
-                                  const uint8_t* src);
-// Export one row (starting at x_out position) from rescaler.
-extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
-
-// Must be called first before using the above.
-void WebPRescalerDspInit(void);
-
-//------------------------------------------------------------------------------
-// Utilities for processing transparent channel.
+// Pre-multiply planes with alpha values

 // Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
 // alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
@ -399,98 +200,15 @@ extern void (*WebPApplyAlphaMultiply)(
 extern void (*WebPApplyAlphaMultiply4444)(
    uint8_t* rgba4444, int w, int h, int stride);

-// Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
-// Returns true if alpha[] plane has non-trivial values different from 0xff.
-extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride,
-                                int width, int height,
-                                uint8_t* dst, int dst_stride);
-
-// Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the
-// A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units.
-extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride,
-                                        int width, int height,
-                                        uint32_t* dst, int dst_stride);
-
-// Extract the alpha values from 32b values in argb[] and pack them into alpha[]
-// (this is the opposite of WebPDispatchAlpha).
-// Returns true if there's only trivial 0xff alpha values.
-extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride,
-                               int width, int height,
-                               uint8_t* alpha, int alpha_stride);
-
-// Pre-Multiply operation transforms x into x * A / 255  (where x=Y,R,G or B).
-// Un-Multiply operation transforms x into x * 255 / A.
-
-// Pre-Multiply or Un-Multiply (if 'inverse' is true) argb values in a row.
-extern void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
-
-// Same a WebPMultARGBRow(), but for several rows.
-void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
-                      int inverse);
-
-// Same for a row of single values, with side alpha values.
-extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
-                           int width, int inverse);
-
-// Same a WebPMultRow(), but for several 'num_rows' rows.
-void WebPMultRows(uint8_t* ptr, int stride,
-                  const uint8_t* alpha, int alpha_stride,
-                  int width, int num_rows, int inverse);
-
-// Plain-C versions, used as fallback by some implementations.
-void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
-                  int width, int inverse);
-void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
-
 // To be called first before using the above.
-void WebPInitAlphaProcessing(void);
+void WebPInitPremultiply(void);

-// ARGB packing function: a/r/g/b input is rgba or bgra order.
-extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
-                           const uint8_t* g, const uint8_t* b, int len,
-                           uint32_t* out);
-
-// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
-extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                          int len, int step, uint32_t* out);
-
-// To be called first before using the above.
-void VP8EncDspARGBInit(void);
+void WebPInitPremultiplySSE2(void);   // should not be called directly.
+void WebPInitPremultiplyNEON(void);

 //------------------------------------------------------------------------------
-// Filter functions

-typedef enum {     // Filter types.
-  WEBP_FILTER_NONE = 0,
-  WEBP_FILTER_HORIZONTAL,
-  WEBP_FILTER_VERTICAL,
-  WEBP_FILTER_GRADIENT,
-  WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1,  // end marker
-  WEBP_FILTER_BEST,    // meta-types
-  WEBP_FILTER_FAST
-} WEBP_FILTER_TYPE;
-
-typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
-                               int stride, uint8_t* out);
-typedef void (*WebPUnfilterFunc)(int width, int height, int stride,
-                                 int row, int num_rows, uint8_t* data);
-
-// Filter the given data using the given predictor.
-// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
-// in raster order.
-// 'stride' is number of bytes per scan line (with possible padding).
-// 'out' should be pre-allocated.
-extern WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
-
-// In-place reconstruct the original data from the given filtered data.
-// The reconstruction will be done for 'num_rows' rows starting from 'row'
-// (assuming rows upto 'row - 1' are already reconstructed).
-extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
-
-// To be called first before using the above.
-void VP8FiltersInit(void);
-
-#ifdef __cplusplus
+#if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/src/dsp/enc.c
+++ b/src/dsp/enc.c
@ -11,12 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include <assert.h>
 #include <stdlib.h>  // for abs()
-
 #include "./dsp.h"
 #include "../enc/vp8enci.h"

+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 static WEBP_INLINE uint8_t clip_8b(int v) {
  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
 }
@ -40,27 +42,10 @@ const int VP8DspScan[16 + 4 + 4] = {
  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
 };

-// general-purpose util function
-void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
-                         VP8Histogram* const histo) {
-  int max_value = 0, last_non_zero = 1;
-  int k;
-  for (k = 0; k <= MAX_COEFF_THRESH; ++k) {
-    const int value = distribution[k];
-    if (value > 0) {
-      if (value > max_value) max_value = value;
-      last_non_zero = k;
-    }
-  }
-  histo->max_value = max_value;
-  histo->last_non_zero = last_non_zero;
-}
-
 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
                             int start_block, int end_block,
                             VP8Histogram* const histo) {
  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
  for (j = start_block; j < end_block; ++j) {
    int k;
    int16_t out[16];
@ -71,10 +56,9 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
    for (k = 0; k < 16; ++k) {
      const int v = abs(out[k]) >> 3;  // TODO(skal): add rounding?
      const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
-      ++distribution[clipped_value];
+      histo->distribution[clipped_value]++;
    }
  }
-  VP8SetHistogramData(distribution, histo);
 }

 //------------------------------------------------------------------------------
@ -86,7 +70,7 @@ static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
 // and make sure it's set to true _last_ (so as to be thread-safe)
 static volatile int tables_ok = 0;

-static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
+static void InitTables(void) {
  if (!tables_ok) {
    int i;
    for (i = -255; i <= 255 + 255; ++i) {
@ -177,14 +161,36 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
  }
 }

-static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
-  VP8FTransform(src, ref, out);
-  VP8FTransform(src + 4, ref + 4, out + 16);
+static void ITransformWHT(const int16_t* in, int16_t* out) {
+  int tmp[16];
+  int i;
+  for (i = 0; i < 4; ++i) {
+    const int a0 = in[0 + i] + in[12 + i];
+    const int a1 = in[4 + i] + in[ 8 + i];
+    const int a2 = in[4 + i] - in[ 8 + i];
+    const int a3 = in[0 + i] - in[12 + i];
+    tmp[0  + i] = a0 + a1;
+    tmp[8  + i] = a0 - a1;
+    tmp[4  + i] = a3 + a2;
+    tmp[12 + i] = a3 - a2;
+  }
+  for (i = 0; i < 4; ++i) {
+    const int dc = tmp[0 + i * 4] + 3;    // w/ rounder
+    const int a0 = dc             + tmp[3 + i * 4];
+    const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
+    const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
+    const int a3 = dc             - tmp[3 + i * 4];
+    out[ 0] = (a0 + a1) >> 3;
+    out[16] = (a3 + a2) >> 3;
+    out[32] = (a0 - a1) >> 3;
+    out[48] = (a3 - a2) >> 3;
+    out += 64;
+  }
 }

 static void FTransformWHT(const int16_t* in, int16_t* out) {
  // input is 12b signed
-  int32_t tmp[16];
+  int16_t tmp[16];
  int i;
  for (i = 0; i < 4; ++i, in += 64) {
    const int a0 = (in[0 * 16] + in[2 * 16]);  // 13b
@ -218,6 +224,8 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
 //------------------------------------------------------------------------------
 // Intra predictions

+#define DST(x, y) dst[(x) + (y) * BPS]
+
 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
  int j;
  for (j = 0; j < size; ++j) {
@ -228,7 +236,7 @@ static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
 static WEBP_INLINE void VerticalPred(uint8_t* dst,
                                     const uint8_t* top, int size) {
  int j;
-  if (top != NULL) {
+  if (top) {
    for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
  } else {
    Fill(dst, 127, size);
@ -237,7 +245,7 @@ static WEBP_INLINE void VerticalPred(uint8_t* dst,

 static WEBP_INLINE void HorizontalPred(uint8_t* dst,
                                       const uint8_t* left, int size) {
-  if (left != NULL) {
+  if (left) {
    int j;
    for (j = 0; j < size; ++j) {
      memset(dst + j * BPS, left[j], size);
@ -250,8 +258,8 @@ static WEBP_INLINE void HorizontalPred(uint8_t* dst,
 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
                                   const uint8_t* top, int size) {
  int y;
-  if (left != NULL) {
-    if (top != NULL) {
+  if (left) {
+    if (top) {
      const uint8_t* const clip = clip1 + 255 - left[-1];
      for (y = 0; y < size; ++y) {
        const uint8_t* const clip_table = clip + left[y];
@ -269,7 +277,7 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
    // is equivalent to VE prediction where you just copy the top samples.
    // Note that if top samples are not available, the default value is
    // then 129, and not 127 as in the VerticalPred case.
-    if (top != NULL) {
+    if (top) {
      VerticalPred(dst, top, size);
    } else {
      Fill(dst, 129, size);
@ -282,15 +290,15 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
                               int size, int round, int shift) {
  int DC = 0;
  int j;
-  if (top != NULL) {
+  if (top) {
    for (j = 0; j < size; ++j) DC += top[j];
-    if (left != NULL) {   // top and left present
+    if (left) {   // top and left present
      for (j = 0; j < size; ++j) DC += left[j];
    } else {      // top, but no left
      DC += DC;
    }
    DC = (DC + round) >> shift;
-  } else if (left != NULL) {   // left but no top
+  } else if (left) {   // left but no top
    for (j = 0; j < size; ++j) DC += left[j];
    DC += DC;
    DC = (DC + round) >> shift;
@ -312,8 +320,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
  TrueMotion(C8TM8 + dst, left, top, 8);
  // V block
  dst += 8;
-  if (top != NULL) top += 8;
-  if (left != NULL) left += 16;
+  if (top) top += 8;
+  if (left) left += 16;
  DCMode(C8DC8 + dst, left, top, 8, 8, 4);
  VerticalPred(C8VE8 + dst, top, 8);
  HorizontalPred(C8HE8 + dst, left, 8);
@ -334,7 +342,6 @@ static void Intra16Preds(uint8_t* dst,
 //------------------------------------------------------------------------------
 // luma 4x4 prediction

-#define DST(x, y) dst[(x) + (y) * BPS]
 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)

@ -357,10 +364,10 @@ static void HE4(uint8_t* dst, const uint8_t* top) {    // horizontal
  const int J = top[-3];
  const int K = top[-4];
  const int L = top[-5];
-  WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(X, I, J));
-  WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(I, J, K));
-  WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(J, K, L));
-  WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
+  *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J);
+  *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K);
+  *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L);
+  *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L);
 }

 static void DC4(uint8_t* dst, const uint8_t* top) {
@ -622,57 +629,21 @@ static const uint8_t kZigzag[16] = {

 // Simple quantization
 static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
+                         int n, const VP8Matrix* const mtx) {
  int last = -1;
-  int n;
-  for (n = 0; n < 16; ++n) {
+  for (; n < 16; ++n) {
    const int j = kZigzag[n];
    const int sign = (in[j] < 0);
-    const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    const int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
    if (coeff > mtx->zthresh_[j]) {
-      const uint32_t Q = mtx->q_[j];
-      const uint32_t iQ = mtx->iq_[j];
-      const uint32_t B = mtx->bias_[j];
-      int level = QUANTDIV(coeff, iQ, B);
-      if (level > MAX_LEVEL) level = MAX_LEVEL;
-      if (sign) level = -level;
-      in[j] = level * Q;
-      out[n] = level;
-      if (level) last = n;
-    } else {
-      out[n] = 0;
-      in[j] = 0;
-    }
-  }
-  return (last >= 0);
-}
-
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
-  int nz;
-  nz  = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
-  nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
-  return nz;
-}
-
-static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
-                            const VP8Matrix* const mtx) {
-  int n, last = -1;
-  for (n = 0; n < 16; ++n) {
-    const int j = kZigzag[n];
-    const int sign = (in[j] < 0);
-    const uint32_t coeff = sign ? -in[j] : in[j];
-    assert(mtx->sharpen_[j] == 0);
-    if (coeff > mtx->zthresh_[j]) {
-      const uint32_t Q = mtx->q_[j];
-      const uint32_t iQ = mtx->iq_[j];
-      const uint32_t B = mtx->bias_[j];
-      int level = QUANTDIV(coeff, iQ, B);
-      if (level > MAX_LEVEL) level = MAX_LEVEL;
-      if (sign) level = -level;
-      in[j] = level * Q;
-      out[n] = level;
-      if (level) last = n;
+      const int Q = mtx->q_[j];
+      const int iQ = mtx->iq_[j];
+      const int B = mtx->bias_[j];
+      out[n] = QUANTDIV(coeff, iQ, B);
+      if (out[n] > MAX_LEVEL) out[n] = MAX_LEVEL;
+      if (sign) out[n] = -out[n];
+      in[j] = out[n] * Q;
+      if (out[n]) last = n;
    } else {
      out[n] = 0;
      in[j] = 0;
@ -684,22 +655,16 @@ static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
 //------------------------------------------------------------------------------
 // Block copy

-static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
+static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) {
  int y;
-  for (y = 0; y < h; ++y) {
-    memcpy(dst, src, w);
+  for (y = 0; y < size; ++y) {
+    memcpy(dst, src, size);
    src += BPS;
    dst += BPS;
  }
 }

-static void Copy4x4(const uint8_t* src, uint8_t* dst) {
-  Copy(src, dst, 4, 4);
-}
-
-static void Copy16x8(const uint8_t* src, uint8_t* dst) {
-  Copy(src, dst, 16, 8);
-}
+static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); }

 //------------------------------------------------------------------------------
 // Initialization
@ -709,7 +674,7 @@ static void Copy16x8(const uint8_t* src, uint8_t* dst) {
 VP8CHisto VP8CollectHistogram;
 VP8Idct VP8ITransform;
 VP8Fdct VP8FTransform;
-VP8Fdct VP8FTransform2;
+VP8WHT VP8ITransformWHT;
 VP8WHT VP8FTransformWHT;
 VP8Intra4Preds VP8EncPredLuma4;
 VP8IntraPreds VP8EncPredLuma16;
@ -721,32 +686,19 @@ VP8Metric VP8SSE4x4;
 VP8WMetric VP8TDisto4x4;
 VP8WMetric VP8TDisto16x16;
 VP8QuantizeBlock VP8EncQuantizeBlock;
-VP8Quantize2Blocks VP8EncQuantize2Blocks;
-VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
 VP8BlockCopy VP8Copy4x4;
-VP8BlockCopy VP8Copy16x8;

 extern void VP8EncDspInitSSE2(void);
-extern void VP8EncDspInitSSE41(void);
-extern void VP8EncDspInitAVX2(void);
 extern void VP8EncDspInitNEON(void);
-extern void VP8EncDspInitMIPS32(void);
-extern void VP8EncDspInitMIPSdspR2(void);

-static volatile VP8CPUInfo enc_last_cpuinfo_used =
-    (VP8CPUInfo)&enc_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
-  if (enc_last_cpuinfo_used == VP8GetCPUInfo) return;
-
-  VP8DspInit();  // common inverse transforms
+void VP8EncDspInit(void) {
  InitTables();

  // default C implementations
  VP8CollectHistogram = CollectHistogram;
  VP8ITransform = ITransform;
  VP8FTransform = FTransform;
-  VP8FTransform2 = FTransform2;
+  VP8ITransformWHT = ITransformWHT;
  VP8FTransformWHT = FTransformWHT;
  VP8EncPredLuma4 = Intra4Preds;
  VP8EncPredLuma16 = Intra16Preds;
@ -758,43 +710,22 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
  VP8TDisto4x4 = Disto4x4;
  VP8TDisto16x16 = Disto16x16;
  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
-  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
  VP8Copy4x4 = Copy4x4;
-  VP8Copy16x8 = Copy16x8;

  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo != NULL) {
+  if (VP8GetCPUInfo) {
 #if defined(WEBP_USE_SSE2)
    if (VP8GetCPUInfo(kSSE2)) {
      VP8EncDspInitSSE2();
-#if defined(WEBP_USE_SSE41)
-      if (VP8GetCPUInfo(kSSE4_1)) {
-        VP8EncDspInitSSE41();
-      }
-#endif
    }
-#endif
-#if defined(WEBP_USE_AVX2)
-    if (VP8GetCPUInfo(kAVX2)) {
-      VP8EncDspInitAVX2();
-    }
-#endif
-#if defined(WEBP_USE_NEON)
+#elif defined(WEBP_USE_NEON)
    if (VP8GetCPUInfo(kNEON)) {
      VP8EncDspInitNEON();
    }
-#endif
-#if defined(WEBP_USE_MIPS32)
-    if (VP8GetCPUInfo(kMIPS32)) {
-      VP8EncDspInitMIPS32();
-    }
-#endif
-#if defined(WEBP_USE_MIPS_DSP_R2)
-    if (VP8GetCPUInfo(kMIPSdspR2)) {
-      VP8EncDspInitMIPSdspR2();
-    }
 #endif
  }
-  enc_last_cpuinfo_used = VP8GetCPUInfo;
 }
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
--- a/src/dsp/enc_avx2.c
+++ b/src/dsp/enc_avx2.c
@ -1,21 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// AVX2 version of speed-critical encoding functions.
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_AVX2)
-
-#endif  // WEBP_USE_AVX2
-
-//------------------------------------------------------------------------------
-// Entry point
-
-WEBP_DSP_INIT_STUB(VP8EncDspInitAVX2)
--- a/src/dsp/enc_mips32.c
+++ b/src/dsp/enc_mips32.c
@ -1,672 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// MIPS version of speed-critical encoding functions.
-//
-// Author(s): Djordje Pesut    (djordje.pesut@imgtec.com)
-//            Jovan Zelincevic (jovan.zelincevic@imgtec.com)
-//            Slobodan Prijic  (slobodan.prijic@imgtec.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_MIPS32)
-
-#include "./mips_macro.h"
-#include "../enc/vp8enci.h"
-#include "../enc/cost.h"
-
-static const int kC1 = 20091 + (1 << 16);
-static const int kC2 = 35468;
-
-// macro for one vertical pass in ITransformOne
-// MUL macro inlined
-// temp0..temp15 holds tmp[0]..tmp[15]
-// A..D - offsets in bytes to load from in buffer
-// TEMP0..TEMP3 - registers for corresponding tmp elements
-// TEMP4..TEMP5 - temporary registers
-#define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3)        \
-  "lh      %[temp16],      " #A "(%[temp20])                 \n\t"          \
-  "lh      %[temp18],      " #B "(%[temp20])                 \n\t"          \
-  "lh      %[temp17],      " #C "(%[temp20])                 \n\t"          \
-  "lh      %[temp19],      " #D "(%[temp20])                 \n\t"          \
-  "addu    %[" #TEMP4 "],    %[temp16],      %[temp18]       \n\t"          \
-  "subu    %[temp16],      %[temp16],      %[temp18]         \n\t"          \
-  "mul     %[" #TEMP0 "],    %[temp17],      %[kC2]          \n\t"          \
-  "mul     %[temp18],      %[temp19],      %[kC1]            \n\t"          \
-  "mul     %[temp17],      %[temp17],      %[kC1]            \n\t"          \
-  "mul     %[temp19],      %[temp19],      %[kC2]            \n\t"          \
-  "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    16            \n\n"          \
-  "sra     %[temp18],      %[temp18],      16                \n\n"          \
-  "sra     %[temp17],      %[temp17],      16                \n\n"          \
-  "sra     %[temp19],      %[temp19],      16                \n\n"          \
-  "subu    %[" #TEMP2 "],    %[" #TEMP0 "],    %[temp18]     \n\t"          \
-  "addu    %[" #TEMP3 "],    %[temp17],      %[temp19]       \n\t"          \
-  "addu    %[" #TEMP0 "],    %[" #TEMP4 "],    %[" #TEMP3 "] \n\t"          \
-  "addu    %[" #TEMP1 "],    %[temp16],      %[" #TEMP2 "]   \n\t"          \
-  "subu    %[" #TEMP2 "],    %[temp16],      %[" #TEMP2 "]   \n\t"          \
-  "subu    %[" #TEMP3 "],    %[" #TEMP4 "],    %[" #TEMP3 "] \n\t"
-
-// macro for one horizontal pass in ITransformOne
-// MUL and STORE macros inlined
-// a = clip_8b(a) is replaced with: a = max(a, 0); a = min(a, 255)
-// temp0..temp15 holds tmp[0]..tmp[15]
-// A - offset in bytes to load from ref and store to dst buffer
-// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
-#define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12)                       \
-  "addiu   %[" #TEMP0 "],    %[" #TEMP0 "],    4               \n\t"          \
-  "addu    %[temp16],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"          \
-  "subu    %[temp17],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"          \
-  "mul     %[" #TEMP0 "],    %[" #TEMP4 "],    %[kC2]          \n\t"          \
-  "mul     %[" #TEMP8 "],    %[" #TEMP12 "],   %[kC1]          \n\t"          \
-  "mul     %[" #TEMP4 "],    %[" #TEMP4 "],    %[kC1]          \n\t"          \
-  "mul     %[" #TEMP12 "],   %[" #TEMP12 "],   %[kC2]          \n\t"          \
-  "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    16              \n\t"          \
-  "sra     %[" #TEMP8 "],    %[" #TEMP8 "],    16              \n\t"          \
-  "sra     %[" #TEMP4 "],    %[" #TEMP4 "],    16              \n\t"          \
-  "sra     %[" #TEMP12 "],   %[" #TEMP12 "],   16              \n\t"          \
-  "subu    %[temp18],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"          \
-  "addu    %[temp19],      %[" #TEMP4 "],    %[" #TEMP12 "]    \n\t"          \
-  "addu    %[" #TEMP0 "],    %[temp16],      %[temp19]         \n\t"          \
-  "addu    %[" #TEMP4 "],    %[temp17],      %[temp18]         \n\t"          \
-  "subu    %[" #TEMP8 "],    %[temp17],      %[temp18]         \n\t"          \
-  "subu    %[" #TEMP12 "],   %[temp16],      %[temp19]         \n\t"          \
-  "lw      %[temp20],      0(%[args])                          \n\t"          \
-  "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    3               \n\t"          \
-  "sra     %[" #TEMP4 "],    %[" #TEMP4 "],    3               \n\t"          \
-  "sra     %[" #TEMP8 "],    %[" #TEMP8 "],    3               \n\t"          \
-  "sra     %[" #TEMP12 "],   %[" #TEMP12 "],   3               \n\t"          \
-  "lbu     %[temp16],      0+" XSTR(BPS) "*" #A "(%[temp20])   \n\t"          \
-  "lbu     %[temp17],      1+" XSTR(BPS) "*" #A "(%[temp20])   \n\t"          \
-  "lbu     %[temp18],      2+" XSTR(BPS) "*" #A "(%[temp20])   \n\t"          \
-  "lbu     %[temp19],      3+" XSTR(BPS) "*" #A "(%[temp20])   \n\t"          \
-  "addu    %[" #TEMP0 "],    %[temp16],      %[" #TEMP0 "]     \n\t"          \
-  "addu    %[" #TEMP4 "],    %[temp17],      %[" #TEMP4 "]     \n\t"          \
-  "addu    %[" #TEMP8 "],    %[temp18],      %[" #TEMP8 "]     \n\t"          \
-  "addu    %[" #TEMP12 "],   %[temp19],      %[" #TEMP12 "]    \n\t"          \
-  "slt     %[temp16],      %[" #TEMP0 "],    $zero             \n\t"          \
-  "slt     %[temp17],      %[" #TEMP4 "],    $zero             \n\t"          \
-  "slt     %[temp18],      %[" #TEMP8 "],    $zero             \n\t"          \
-  "slt     %[temp19],      %[" #TEMP12 "],   $zero             \n\t"          \
-  "movn    %[" #TEMP0 "],    $zero,          %[temp16]         \n\t"          \
-  "movn    %[" #TEMP4 "],    $zero,          %[temp17]         \n\t"          \
-  "movn    %[" #TEMP8 "],    $zero,          %[temp18]         \n\t"          \
-  "movn    %[" #TEMP12 "],   $zero,          %[temp19]         \n\t"          \
-  "addiu   %[temp20],      $zero,          255                 \n\t"          \
-  "slt     %[temp16],      %[" #TEMP0 "],    %[temp20]         \n\t"          \
-  "slt     %[temp17],      %[" #TEMP4 "],    %[temp20]         \n\t"          \
-  "slt     %[temp18],      %[" #TEMP8 "],    %[temp20]         \n\t"          \
-  "slt     %[temp19],      %[" #TEMP12 "],   %[temp20]         \n\t"          \
-  "movz    %[" #TEMP0 "],    %[temp20],      %[temp16]         \n\t"          \
-  "movz    %[" #TEMP4 "],    %[temp20],      %[temp17]         \n\t"          \
-  "lw      %[temp16],      8(%[args])                          \n\t"          \
-  "movz    %[" #TEMP8 "],    %[temp20],      %[temp18]         \n\t"          \
-  "movz    %[" #TEMP12 "],   %[temp20],      %[temp19]         \n\t"          \
-  "sb      %[" #TEMP0 "],    0+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
-  "sb      %[" #TEMP4 "],    1+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
-  "sb      %[" #TEMP8 "],    2+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
-  "sb      %[" #TEMP12 "],   3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
-
-// Does one or two inverse transforms.
-static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
-                                      uint8_t* dst) {
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
-  int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
-  int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
-  const int* args[3] = {(const int*)ref, (const int*)in, (const int*)dst};
-
-  __asm__ volatile(
-    "lw      %[temp20],      4(%[args])                      \n\t"
-    VERTICAL_PASS(0, 16,  8, 24, temp4,  temp0,  temp1,  temp2,  temp3)
-    VERTICAL_PASS(2, 18, 10, 26, temp8,  temp4,  temp5,  temp6,  temp7)
-    VERTICAL_PASS(4, 20, 12, 28, temp12, temp8,  temp9,  temp10, temp11)
-    VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)
-
-    HORIZONTAL_PASS(0, temp0, temp4, temp8,  temp12)
-    HORIZONTAL_PASS(1, temp1, temp5, temp9,  temp13)
-    HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14)
-    HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15)
-
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
-      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
-      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
-      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
-      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
-    : [args]"r"(args), [kC1]"r"(kC1), [kC2]"r"(kC2)
-    : "memory", "hi", "lo"
-  );
-}
-
-static void ITransform(const uint8_t* ref, const int16_t* in,
-                       uint8_t* dst, int do_two) {
-  ITransformOne(ref, in, dst);
-  if (do_two) {
-    ITransformOne(ref + 4, in + 16, dst + 4);
-  }
-}
-
-#undef VERTICAL_PASS
-#undef HORIZONTAL_PASS
-
-// macro for one pass through for loop in QuantizeBlock
-// QUANTDIV macro inlined
-// J - offset in bytes (kZigzag[n] * 2)
-// K - offset in bytes (kZigzag[n] * 4)
-// N - offset in bytes (n * 2)
-#define QUANTIZE_ONE(J, K, N)                                               \
-  "lh           %[temp0],       " #J "(%[ppin])                     \n\t"   \
-  "lhu          %[temp1],       " #J "(%[ppsharpen])                \n\t"   \
-  "lw           %[temp2],       " #K "(%[ppzthresh])                \n\t"   \
-  "sra          %[sign],        %[temp0],           15              \n\t"   \
-  "xor          %[coeff],       %[temp0],           %[sign]         \n\t"   \
-  "subu         %[coeff],       %[coeff],           %[sign]         \n\t"   \
-  "addu         %[coeff],       %[coeff],           %[temp1]        \n\t"   \
-  "slt          %[temp4],       %[temp2],           %[coeff]        \n\t"   \
-  "addiu        %[temp5],       $zero,              0               \n\t"   \
-  "addiu        %[level],       $zero,              0               \n\t"   \
-  "beqz         %[temp4],       2f                                  \n\t"   \
-  "lhu          %[temp1],       " #J "(%[ppiq])                     \n\t"   \
-  "lw           %[temp2],       " #K "(%[ppbias])                   \n\t"   \
-  "lhu          %[temp3],       " #J "(%[ppq])                      \n\t"   \
-  "mul          %[level],       %[coeff],           %[temp1]        \n\t"   \
-  "addu         %[level],       %[level],           %[temp2]        \n\t"   \
-  "sra          %[level],       %[level],           17              \n\t"   \
-  "slt          %[temp4],       %[max_level],       %[level]        \n\t"   \
-  "movn         %[level],       %[max_level],       %[temp4]        \n\t"   \
-  "xor          %[level],       %[level],           %[sign]         \n\t"   \
-  "subu         %[level],       %[level],           %[sign]         \n\t"   \
-  "mul          %[temp5],       %[level],           %[temp3]        \n\t"   \
-"2:                                                                 \n\t"   \
-  "sh           %[temp5],       " #J "(%[ppin])                     \n\t"   \
-  "sh           %[level],       " #N "(%[pout])                     \n\t"
-
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
-  int temp0, temp1, temp2, temp3, temp4, temp5;
-  int sign, coeff, level, i;
-  int max_level = MAX_LEVEL;
-
-  int16_t* ppin             = &in[0];
-  int16_t* pout             = &out[0];
-  const uint16_t* ppsharpen = &mtx->sharpen_[0];
-  const uint32_t* ppzthresh = &mtx->zthresh_[0];
-  const uint16_t* ppq       = &mtx->q_[0];
-  const uint16_t* ppiq      = &mtx->iq_[0];
-  const uint32_t* ppbias    = &mtx->bias_[0];
-
-  __asm__ volatile(
-    QUANTIZE_ONE( 0,  0,  0)
-    QUANTIZE_ONE( 2,  4,  2)
-    QUANTIZE_ONE( 8, 16,  4)
-    QUANTIZE_ONE(16, 32,  6)
-    QUANTIZE_ONE(10, 20,  8)
-    QUANTIZE_ONE( 4,  8, 10)
-    QUANTIZE_ONE( 6, 12, 12)
-    QUANTIZE_ONE(12, 24, 14)
-    QUANTIZE_ONE(18, 36, 16)
-    QUANTIZE_ONE(24, 48, 18)
-    QUANTIZE_ONE(26, 52, 20)
-    QUANTIZE_ONE(20, 40, 22)
-    QUANTIZE_ONE(14, 28, 24)
-    QUANTIZE_ONE(22, 44, 26)
-    QUANTIZE_ONE(28, 56, 28)
-    QUANTIZE_ONE(30, 60, 30)
-
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [sign]"=&r"(sign), [coeff]"=&r"(coeff),
-      [level]"=&r"(level)
-    : [pout]"r"(pout), [ppin]"r"(ppin),
-      [ppiq]"r"(ppiq), [max_level]"r"(max_level),
-      [ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
-      [ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
-    : "memory", "hi", "lo"
-  );
-
-  // moved out from macro to increase possibility for earlier breaking
-  for (i = 15; i >= 0; i--) {
-    if (out[i]) return 1;
-  }
-  return 0;
-}
-
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
-  int nz;
-  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
-  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
-  return nz;
-}
-
-#undef QUANTIZE_ONE
-
-// macro for one horizontal pass in Disto4x4 (TTransform)
-// two calls of function TTransform are merged into single one
-// A - offset in bytes to load from a and b buffers
-// E..H - offsets in bytes to store first results to tmp buffer
-// E1..H1 - offsets in bytes to store second results to tmp buffer
-#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1)                  \
-  "lbu    %[temp0],  0+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
-  "lbu    %[temp1],  1+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
-  "lbu    %[temp2],  2+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
-  "lbu    %[temp3],  3+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
-  "lbu    %[temp4],  0+" XSTR(BPS) "*" #A "(%[b])  \n\t"                \
-  "lbu    %[temp5],  1+" XSTR(BPS) "*" #A "(%[b])  \n\t"                \
-  "lbu    %[temp6],  2+" XSTR(BPS) "*" #A "(%[b])  \n\t"                \
-  "lbu    %[temp7],  3+" XSTR(BPS) "*" #A "(%[b])  \n\t"                \
-  "addu   %[temp8],  %[temp0],    %[temp2]         \n\t"                \
-  "subu   %[temp0],  %[temp0],    %[temp2]         \n\t"                \
-  "addu   %[temp2],  %[temp1],    %[temp3]         \n\t"                \
-  "subu   %[temp1],  %[temp1],    %[temp3]         \n\t"                \
-  "addu   %[temp3],  %[temp4],    %[temp6]         \n\t"                \
-  "subu   %[temp4],  %[temp4],    %[temp6]         \n\t"                \
-  "addu   %[temp6],  %[temp5],    %[temp7]         \n\t"                \
-  "subu   %[temp5],  %[temp5],    %[temp7]         \n\t"                \
-  "addu   %[temp7],  %[temp8],    %[temp2]         \n\t"                \
-  "subu   %[temp2],  %[temp8],    %[temp2]         \n\t"                \
-  "addu   %[temp8],  %[temp0],    %[temp1]         \n\t"                \
-  "subu   %[temp0],  %[temp0],    %[temp1]         \n\t"                \
-  "addu   %[temp1],  %[temp3],    %[temp6]         \n\t"                \
-  "subu   %[temp3],  %[temp3],    %[temp6]         \n\t"                \
-  "addu   %[temp6],  %[temp4],    %[temp5]         \n\t"                \
-  "subu   %[temp4],  %[temp4],    %[temp5]         \n\t"                \
-  "sw     %[temp7],  " #E "(%[tmp])                \n\t"                \
-  "sw     %[temp2],  " #H "(%[tmp])                \n\t"                \
-  "sw     %[temp8],  " #F "(%[tmp])                \n\t"                \
-  "sw     %[temp0],  " #G "(%[tmp])                \n\t"                \
-  "sw     %[temp1],  " #E1 "(%[tmp])               \n\t"                \
-  "sw     %[temp3],  " #H1 "(%[tmp])               \n\t"                \
-  "sw     %[temp6],  " #F1 "(%[tmp])               \n\t"                \
-  "sw     %[temp4],  " #G1 "(%[tmp])               \n\t"
-
-// macro for one vertical pass in Disto4x4 (TTransform)
-// two calls of function TTransform are merged into single one
-// since only one accu is available in mips32r1 instruction set
-//   first is done second call of function TTransform and after
-//   that first one.
-//   const int sum1 = TTransform(a, w);
-//   const int sum2 = TTransform(b, w);
-//   return abs(sum2 - sum1) >> 5;
-//   (sum2 - sum1) is calculated with madds (sub2) and msubs (sub1)
-// A..D - offsets in bytes to load first results from tmp buffer
-// A1..D1 - offsets in bytes to load second results from tmp buffer
-// E..H - offsets in bytes to load from w buffer
-#define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H)     \
-  "lw     %[temp0],  " #A1 "(%[tmp])         \n\t"                \
-  "lw     %[temp1],  " #C1 "(%[tmp])         \n\t"                \
-  "lw     %[temp2],  " #B1 "(%[tmp])         \n\t"                \
-  "lw     %[temp3],  " #D1 "(%[tmp])         \n\t"                \
-  "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"                \
-  "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"                \
-  "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"                \
-  "subu   %[temp2],  %[temp2],    %[temp3]   \n\t"                \
-  "addu   %[temp3],  %[temp8],    %[temp1]   \n\t"                \
-  "subu   %[temp8],  %[temp8],    %[temp1]   \n\t"                \
-  "addu   %[temp1],  %[temp0],    %[temp2]   \n\t"                \
-  "subu   %[temp0],  %[temp0],    %[temp2]   \n\t"                \
-  "sra    %[temp4],  %[temp3],    31         \n\t"                \
-  "sra    %[temp5],  %[temp1],    31         \n\t"                \
-  "sra    %[temp6],  %[temp0],    31         \n\t"                \
-  "sra    %[temp7],  %[temp8],    31         \n\t"                \
-  "xor    %[temp3],  %[temp3],    %[temp4]   \n\t"                \
-  "xor    %[temp1],  %[temp1],    %[temp5]   \n\t"                \
-  "xor    %[temp0],  %[temp0],    %[temp6]   \n\t"                \
-  "xor    %[temp8],  %[temp8],    %[temp7]   \n\t"                \
-  "subu   %[temp3],  %[temp3],    %[temp4]   \n\t"                \
-  "subu   %[temp1],  %[temp1],    %[temp5]   \n\t"                \
-  "subu   %[temp0],  %[temp0],    %[temp6]   \n\t"                \
-  "subu   %[temp8],  %[temp8],    %[temp7]   \n\t"                \
-  "lhu    %[temp4],  " #E "(%[w])            \n\t"                \
-  "lhu    %[temp5],  " #F "(%[w])            \n\t"                \
-  "lhu    %[temp6],  " #G "(%[w])            \n\t"                \
-  "lhu    %[temp7],  " #H "(%[w])            \n\t"                \
-  "madd   %[temp4],  %[temp3]                \n\t"                \
-  "madd   %[temp5],  %[temp1]                \n\t"                \
-  "madd   %[temp6],  %[temp0]                \n\t"                \
-  "madd   %[temp7],  %[temp8]                \n\t"                \
-  "lw     %[temp0],  " #A "(%[tmp])          \n\t"                \
-  "lw     %[temp1],  " #C "(%[tmp])          \n\t"                \
-  "lw     %[temp2],  " #B "(%[tmp])          \n\t"                \
-  "lw     %[temp3],  " #D "(%[tmp])          \n\t"                \
-  "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"                \
-  "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"                \
-  "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"                \
-  "subu   %[temp2],  %[temp2],    %[temp3]   \n\t"                \
-  "addu   %[temp3],  %[temp8],    %[temp1]   \n\t"                \
-  "subu   %[temp1],  %[temp8],    %[temp1]   \n\t"                \
-  "addu   %[temp8],  %[temp0],    %[temp2]   \n\t"                \
-  "subu   %[temp0],  %[temp0],    %[temp2]   \n\t"                \
-  "sra    %[temp2],  %[temp3],    31         \n\t"                \
-  "xor    %[temp3],  %[temp3],    %[temp2]   \n\t"                \
-  "subu   %[temp3],  %[temp3],    %[temp2]   \n\t"                \
-  "msub   %[temp4],  %[temp3]                \n\t"                \
-  "sra    %[temp2],  %[temp8],    31         \n\t"                \
-  "sra    %[temp3],  %[temp0],    31         \n\t"                \
-  "sra    %[temp4],  %[temp1],    31         \n\t"                \
-  "xor    %[temp8],  %[temp8],    %[temp2]   \n\t"                \
-  "xor    %[temp0],  %[temp0],    %[temp3]   \n\t"                \
-  "xor    %[temp1],  %[temp1],    %[temp4]   \n\t"                \
-  "subu   %[temp8],  %[temp8],    %[temp2]   \n\t"                \
-  "subu   %[temp0],  %[temp0],    %[temp3]   \n\t"                \
-  "subu   %[temp1],  %[temp1],    %[temp4]   \n\t"                \
-  "msub   %[temp5],  %[temp8]                \n\t"                \
-  "msub   %[temp6],  %[temp0]                \n\t"                \
-  "msub   %[temp7],  %[temp1]                \n\t"
-
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
-                    const uint16_t* const w) {
-  int tmp[32];
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
-
-  __asm__ volatile(
-    HORIZONTAL_PASS(0,   0,  4,  8, 12,    64,  68,  72,  76)
-    HORIZONTAL_PASS(1,  16, 20, 24, 28,    80,  84,  88,  92)
-    HORIZONTAL_PASS(2,  32, 36, 40, 44,    96, 100, 104, 108)
-    HORIZONTAL_PASS(3,  48, 52, 56, 60,   112, 116, 120, 124)
-    "mthi   $zero                             \n\t"
-    "mtlo   $zero                             \n\t"
-    VERTICAL_PASS( 0, 16, 32, 48,     64, 80,  96, 112,   0,  8, 16, 24)
-    VERTICAL_PASS( 4, 20, 36, 52,     68, 84, 100, 116,   2, 10, 18, 26)
-    VERTICAL_PASS( 8, 24, 40, 56,     72, 88, 104, 120,   4, 12, 20, 28)
-    VERTICAL_PASS(12, 28, 44, 60,     76, 92, 108, 124,   6, 14, 22, 30)
-    "mflo   %[temp0]                          \n\t"
-    "sra    %[temp1],  %[temp0],  31          \n\t"
-    "xor    %[temp0],  %[temp0],  %[temp1]    \n\t"
-    "subu   %[temp0],  %[temp0],  %[temp1]    \n\t"
-    "sra    %[temp0],  %[temp0],  5           \n\t"
-
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
-    : [a]"r"(a), [b]"r"(b), [w]"r"(w), [tmp]"r"(tmp)
-    : "memory", "hi", "lo"
-  );
-
-  return temp0;
-}
-
-#undef VERTICAL_PASS
-#undef HORIZONTAL_PASS
-
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
-  int D = 0;
-  int x, y;
-  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
-    for (x = 0; x < 16; x += 4) {
-      D += Disto4x4(a + x + y, b + x + y, w);
-    }
-  }
-  return D;
-}
-
-// macro for one horizontal pass in FTransform
-// temp0..temp15 holds tmp[0]..tmp[15]
-// A - offset in bytes to load from src and ref buffers
-// TEMP0..TEMP3 - registers for corresponding tmp elements
-#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3)                  \
-  "lw     %[" #TEMP1 "],  0(%[args])                           \n\t"    \
-  "lw     %[" #TEMP2 "],  4(%[args])                           \n\t"    \
-  "lbu    %[temp16],    0+" XSTR(BPS) "*" #A "(%[" #TEMP1 "])  \n\t"    \
-  "lbu    %[temp17],    0+" XSTR(BPS) "*" #A "(%[" #TEMP2 "])  \n\t"    \
-  "lbu    %[temp18],    1+" XSTR(BPS) "*" #A "(%[" #TEMP1 "])  \n\t"    \
-  "lbu    %[temp19],    1+" XSTR(BPS) "*" #A "(%[" #TEMP2 "])  \n\t"    \
-  "subu   %[temp20],    %[temp16],    %[temp17]                \n\t"    \
-  "lbu    %[temp16],    2+" XSTR(BPS) "*" #A "(%[" #TEMP1 "])  \n\t"    \
-  "lbu    %[temp17],    2+" XSTR(BPS) "*" #A "(%[" #TEMP2 "])  \n\t"    \
-  "subu   %[" #TEMP0 "],  %[temp18],    %[temp19]              \n\t"    \
-  "lbu    %[temp18],    3+" XSTR(BPS) "*" #A "(%[" #TEMP1 "])  \n\t"    \
-  "lbu    %[temp19],    3+" XSTR(BPS) "*" #A "(%[" #TEMP2 "])  \n\t"    \
-  "subu   %[" #TEMP1 "],  %[temp16],    %[temp17]              \n\t"    \
-  "subu   %[" #TEMP2 "],  %[temp18],    %[temp19]              \n\t"    \
-  "addu   %[" #TEMP3 "],  %[temp20],    %[" #TEMP2 "]          \n\t"    \
-  "subu   %[" #TEMP2 "],  %[temp20],    %[" #TEMP2 "]          \n\t"    \
-  "addu   %[temp20],    %[" #TEMP0 "],  %[" #TEMP1 "]          \n\t"    \
-  "subu   %[" #TEMP0 "],  %[" #TEMP0 "],  %[" #TEMP1 "]        \n\t"    \
-  "mul    %[temp16],    %[" #TEMP2 "],  %[c5352]               \n\t"    \
-  "mul    %[temp17],    %[" #TEMP2 "],  %[c2217]               \n\t"    \
-  "mul    %[temp18],    %[" #TEMP0 "],  %[c5352]               \n\t"    \
-  "mul    %[temp19],    %[" #TEMP0 "],  %[c2217]               \n\t"    \
-  "addu   %[" #TEMP1 "],  %[" #TEMP3 "],  %[temp20]            \n\t"    \
-  "subu   %[temp20],    %[" #TEMP3 "],  %[temp20]              \n\t"    \
-  "sll    %[" #TEMP0 "],  %[" #TEMP1 "],  3                    \n\t"    \
-  "sll    %[" #TEMP2 "],  %[temp20],    3                      \n\t"    \
-  "addiu  %[temp16],    %[temp16],    1812                     \n\t"    \
-  "addiu  %[temp17],    %[temp17],    937                      \n\t"    \
-  "addu   %[temp16],    %[temp16],    %[temp19]                \n\t"    \
-  "subu   %[temp17],    %[temp17],    %[temp18]                \n\t"    \
-  "sra    %[" #TEMP1 "],  %[temp16],    9                      \n\t"    \
-  "sra    %[" #TEMP3 "],  %[temp17],    9                      \n\t"
-
-// macro for one vertical pass in FTransform
-// temp0..temp15 holds tmp[0]..tmp[15]
-// A..D - offsets in bytes to store to out buffer
-// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
-#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12)    \
-  "addu   %[temp16],    %[" #TEMP0 "],  %[" #TEMP12 "]   \n\t"    \
-  "subu   %[temp19],    %[" #TEMP0 "],  %[" #TEMP12 "]   \n\t"    \
-  "addu   %[temp17],    %[" #TEMP4 "],  %[" #TEMP8 "]    \n\t"    \
-  "subu   %[temp18],    %[" #TEMP4 "],  %[" #TEMP8 "]    \n\t"    \
-  "mul    %[" #TEMP8 "],  %[temp19],    %[c2217]         \n\t"    \
-  "mul    %[" #TEMP12 "], %[temp18],    %[c2217]         \n\t"    \
-  "mul    %[" #TEMP4 "],  %[temp19],    %[c5352]         \n\t"    \
-  "mul    %[temp18],    %[temp18],    %[c5352]           \n\t"    \
-  "addiu  %[temp16],    %[temp16],    7                  \n\t"    \
-  "addu   %[" #TEMP0 "],  %[temp16],    %[temp17]        \n\t"    \
-  "sra    %[" #TEMP0 "],  %[" #TEMP0 "],  4              \n\t"    \
-  "addu   %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "]  \n\t"    \
-  "subu   %[" #TEMP4 "],  %[temp16],    %[temp17]        \n\t"    \
-  "sra    %[" #TEMP4 "],  %[" #TEMP4 "],  4              \n\t"    \
-  "addiu  %[" #TEMP8 "],  %[" #TEMP8 "],  30000          \n\t"    \
-  "addiu  %[" #TEMP12 "], %[" #TEMP12 "], 12000          \n\t"    \
-  "addiu  %[" #TEMP8 "],  %[" #TEMP8 "],  21000          \n\t"    \
-  "subu   %[" #TEMP8 "],  %[" #TEMP8 "],  %[temp18]      \n\t"    \
-  "sra    %[" #TEMP12 "], %[" #TEMP12 "], 16             \n\t"    \
-  "sra    %[" #TEMP8 "],  %[" #TEMP8 "],  16             \n\t"    \
-  "addiu  %[temp16],    %[" #TEMP12 "], 1                \n\t"    \
-  "movn   %[" #TEMP12 "], %[temp16],    %[temp19]        \n\t"    \
-  "sh     %[" #TEMP0 "],  " #A "(%[temp20])              \n\t"    \
-  "sh     %[" #TEMP4 "],  " #C "(%[temp20])              \n\t"    \
-  "sh     %[" #TEMP8 "],  " #D "(%[temp20])              \n\t"    \
-  "sh     %[" #TEMP12 "], " #B "(%[temp20])              \n\t"
-
-static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
-  int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
-  int temp17, temp18, temp19, temp20;
-  const int c2217 = 2217;
-  const int c5352 = 5352;
-  const int* const args[3] =
-      { (const int*)src, (const int*)ref, (const int*)out };
-
-  __asm__ volatile(
-    HORIZONTAL_PASS(0, temp0,  temp1,  temp2,  temp3)
-    HORIZONTAL_PASS(1, temp4,  temp5,  temp6,  temp7)
-    HORIZONTAL_PASS(2, temp8,  temp9,  temp10, temp11)
-    HORIZONTAL_PASS(3, temp12, temp13, temp14, temp15)
-    "lw   %[temp20],    8(%[args])                     \n\t"
-    VERTICAL_PASS(0,  8, 16, 24, temp0, temp4, temp8,  temp12)
-    VERTICAL_PASS(2, 10, 18, 26, temp1, temp5, temp9,  temp13)
-    VERTICAL_PASS(4, 12, 20, 28, temp2, temp6, temp10, temp14)
-    VERTICAL_PASS(6, 14, 22, 30, temp3, temp7, temp11, temp15)
-
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
-      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
-      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
-      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
-      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
-    : [args]"r"(args), [c2217]"r"(c2217), [c5352]"r"(c5352)
-    : "memory", "hi", "lo"
-  );
-}
-
-#undef VERTICAL_PASS
-#undef HORIZONTAL_PASS
-
-#if !defined(WORK_AROUND_GCC)
-
-#define GET_SSE_INNER(A, B, C, D)                               \
-  "lbu     %[temp0],    " #A "(%[a])                 \n\t"      \
-  "lbu     %[temp1],    " #A "(%[b])                 \n\t"      \
-  "lbu     %[temp2],    " #B "(%[a])                 \n\t"      \
-  "lbu     %[temp3],    " #B "(%[b])                 \n\t"      \
-  "lbu     %[temp4],    " #C "(%[a])                 \n\t"      \
-  "lbu     %[temp5],    " #C "(%[b])                 \n\t"      \
-  "lbu     %[temp6],    " #D "(%[a])                 \n\t"      \
-  "lbu     %[temp7],    " #D "(%[b])                 \n\t"      \
-  "subu    %[temp0],    %[temp0],     %[temp1]       \n\t"      \
-  "subu    %[temp2],    %[temp2],     %[temp3]       \n\t"      \
-  "subu    %[temp4],    %[temp4],     %[temp5]       \n\t"      \
-  "subu    %[temp6],    %[temp6],     %[temp7]       \n\t"      \
-  "madd    %[temp0],    %[temp0]                     \n\t"      \
-  "madd    %[temp2],    %[temp2]                     \n\t"      \
-  "madd    %[temp4],    %[temp4]                     \n\t"      \
-  "madd    %[temp6],    %[temp6]                     \n\t"
-
-#define GET_SSE(A, B, C, D)               \
-  GET_SSE_INNER(A, A + 1, A + 2, A + 3)   \
-  GET_SSE_INNER(B, B + 1, B + 2, B + 3)   \
-  GET_SSE_INNER(C, C + 1, C + 2, C + 3)   \
-  GET_SSE_INNER(D, D + 1, D + 2, D + 3)
-
-static int SSE16x16(const uint8_t* a, const uint8_t* b) {
-  int count;
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-
-  __asm__ volatile(
-     "mult   $zero,    $zero                            \n\t"
-
-     GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
-     GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
-     GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
-     GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
-     GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
-     GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
-     GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
-     GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
-     GET_SSE( 8 * BPS, 4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS)
-     GET_SSE( 9 * BPS, 4 +  9 * BPS, 8 +  9 * BPS, 12 +  9 * BPS)
-     GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
-     GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
-     GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
-     GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
-     GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
-     GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
-
-    "mflo    %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
-  return count;
-}
-
-static int SSE16x8(const uint8_t* a, const uint8_t* b) {
-  int count;
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-
-  __asm__ volatile(
-     "mult   $zero,    $zero                            \n\t"
-
-     GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
-     GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
-     GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
-     GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
-     GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
-     GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
-     GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
-     GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
-
-    "mflo    %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
-  return count;
-}
-
-static int SSE8x8(const uint8_t* a, const uint8_t* b) {
-  int count;
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-
-  __asm__ volatile(
-     "mult   $zero,    $zero                            \n\t"
-
-     GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
-     GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
-     GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
-     GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
-
-    "mflo    %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
-  return count;
-}
-
-static int SSE4x4(const uint8_t* a, const uint8_t* b) {
-  int count;
-  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-
-  __asm__ volatile(
-     "mult   $zero,    $zero                            \n\t"
-
-     GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
-
-    "mflo    %[count]                                   \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
-    : [a]"r"(a), [b]"r"(b)
-    : "memory", "hi", "lo"
-  );
-  return count;
-}
-
-#undef GET_SSE
-#undef GET_SSE_INNER
-
-#endif  // !WORK_AROUND_GCC
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspInitMIPS32(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPS32(void) {
-  VP8ITransform = ITransform;
-  VP8FTransform = FTransform;
-  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
-  VP8TDisto4x4 = Disto4x4;
-  VP8TDisto16x16 = Disto16x16;
-#if !defined(WORK_AROUND_GCC)
-  VP8SSE16x16 = SSE16x16;
-  VP8SSE8x8 = SSE8x8;
-  VP8SSE16x8 = SSE16x8;
-  VP8SSE4x4 = SSE4x4;
-#endif
-}
-
-#else  // !WEBP_USE_MIPS32
-
-WEBP_DSP_INIT_STUB(VP8EncDspInitMIPS32)
-
-#endif  // WEBP_USE_MIPS32
--- a/src/dsp/enc_mips_dsp_r2.c
+++ b/src/dsp/enc_mips_dsp_r2.c
--- a/src/dsp/enc_neon.c
+++ b/src/dsp/enc_neon.c
--- a/src/dsp/enc_sse2.c
+++ b/src/dsp/enc_sse2.c
--- a/src/dsp/enc_sse41.c
+++ b/src/dsp/enc_sse41.c
@ -1,373 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// SSE4 version of some encoding functions.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_SSE41)
-#include <smmintrin.h>
-#include <stdlib.h>  // for abs()
-
-#include "../enc/vp8enci.h"
-
-//------------------------------------------------------------------------------
-// Compute susceptibility based on DCT-coeff histograms.
-
-static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                             int start_block, int end_block,
-                             VP8Histogram* const histo) {
-  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
-  int j;
-  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
-  for (j = start_block; j < end_block; ++j) {
-    int16_t out[16];
-    int k;
-
-    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
-
-    // Convert coefficients to bin (within out[]).
-    {
-      // Load.
-      const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
-      const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
-      // v = abs(out) >> 3
-      const __m128i abs0 = _mm_abs_epi16(out0);
-      const __m128i abs1 = _mm_abs_epi16(out1);
-      const __m128i v0 = _mm_srai_epi16(abs0, 3);
-      const __m128i v1 = _mm_srai_epi16(abs1, 3);
-      // bin = min(v, MAX_COEFF_THRESH)
-      const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
-      const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
-      // Store.
-      _mm_storeu_si128((__m128i*)&out[0], bin0);
-      _mm_storeu_si128((__m128i*)&out[8], bin1);
-    }
-
-    // Convert coefficients to bin.
-    for (k = 0; k < 16; ++k) {
-      ++distribution[out[k]];
-    }
-  }
-  VP8SetHistogramData(distribution, histo);
-}
-
-//------------------------------------------------------------------------------
-// Texture distortion
-//
-// We try to match the spectral content (weighted) between source and
-// reconstructed samples.
-
-// Hadamard transform
-// Returns the difference between the weighted sum of the absolute value of
-// transformed coefficients.
-static int TTransform(const uint8_t* inA, const uint8_t* inB,
-                      const uint16_t* const w) {
-  __m128i tmp_0, tmp_1, tmp_2, tmp_3;
-
-  // Load, combine and transpose inputs.
-  {
-    const __m128i inA_0 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 0]);
-    const __m128i inA_1 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 1]);
-    const __m128i inA_2 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 2]);
-    const __m128i inA_3 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 3]);
-    const __m128i inB_0 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 0]);
-    const __m128i inB_1 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 1]);
-    const __m128i inB_2 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 2]);
-    const __m128i inB_3 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 3]);
-
-    // Combine inA and inB (we'll do two transforms in parallel).
-    const __m128i inAB_0 = _mm_unpacklo_epi8(inA_0, inB_0);
-    const __m128i inAB_1 = _mm_unpacklo_epi8(inA_1, inB_1);
-    const __m128i inAB_2 = _mm_unpacklo_epi8(inA_2, inB_2);
-    const __m128i inAB_3 = _mm_unpacklo_epi8(inA_3, inB_3);
-    // a00 b00 a01 b01 a02 b03 a03 b03   0 0 0 0 0 0 0 0
-    // a10 b10 a11 b11 a12 b12 a13 b13   0 0 0 0 0 0 0 0
-    // a20 b20 a21 b21 a22 b22 a23 b23   0 0 0 0 0 0 0 0
-    // a30 b30 a31 b31 a32 b32 a33 b33   0 0 0 0 0 0 0 0
-
-    // Transpose the two 4x4, discarding the filling zeroes.
-    const __m128i transpose0_0 = _mm_unpacklo_epi8(inAB_0, inAB_2);
-    const __m128i transpose0_1 = _mm_unpacklo_epi8(inAB_1, inAB_3);
-    // a00 a20  b00 b20  a01 a21  b01 b21  a02 a22  b02 b22  a03 a23  b03 b23
-    // a10 a30  b10 b30  a11 a31  b11 b31  a12 a32  b12 b32  a13 a33  b13 b33
-    const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
-    const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
-    // a00 a10 a20 a30  b00 b10 b20 b30  a01 a11 a21 a31  b01 b11 b21 b31
-    // a02 a12 a22 a32  b02 b12 b22 b32  a03 a13 a23 a33  b03 b13 b23 b33
-
-    // Convert to 16b.
-    tmp_0 = _mm_cvtepu8_epi16(transpose1_0);
-    tmp_1 = _mm_cvtepu8_epi16(_mm_srli_si128(transpose1_0, 8));
-    tmp_2 = _mm_cvtepu8_epi16(transpose1_1);
-    tmp_3 = _mm_cvtepu8_epi16(_mm_srli_si128(transpose1_1, 8));
-    // a00 a10 a20 a30   b00 b10 b20 b30
-    // a01 a11 a21 a31   b01 b11 b21 b31
-    // a02 a12 a22 a32   b02 b12 b22 b32
-    // a03 a13 a23 a33   b03 b13 b23 b33
-  }
-
-  // Horizontal pass and subsequent transpose.
-  {
-    // Calculate a and b (two 4x4 at once).
-    const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
-    const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
-    const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
-    const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
-    const __m128i b0 = _mm_add_epi16(a0, a1);
-    const __m128i b1 = _mm_add_epi16(a3, a2);
-    const __m128i b2 = _mm_sub_epi16(a3, a2);
-    const __m128i b3 = _mm_sub_epi16(a0, a1);
-    // a00 a01 a02 a03   b00 b01 b02 b03
-    // a10 a11 a12 a13   b10 b11 b12 b13
-    // a20 a21 a22 a23   b20 b21 b22 b23
-    // a30 a31 a32 a33   b30 b31 b32 b33
-
-    // Transpose the two 4x4.
-    const __m128i transpose0_0 = _mm_unpacklo_epi16(b0, b1);
-    const __m128i transpose0_1 = _mm_unpacklo_epi16(b2, b3);
-    const __m128i transpose0_2 = _mm_unpackhi_epi16(b0, b1);
-    const __m128i transpose0_3 = _mm_unpackhi_epi16(b2, b3);
-    // a00 a10 a01 a11   a02 a12 a03 a13
-    // a20 a30 a21 a31   a22 a32 a23 a33
-    // b00 b10 b01 b11   b02 b12 b03 b13
-    // b20 b30 b21 b31   b22 b32 b23 b33
-    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
-    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
-    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
-    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
-    // a00 a10 a20 a30 a01 a11 a21 a31
-    // b00 b10 b20 b30 b01 b11 b21 b31
-    // a02 a12 a22 a32 a03 a13 a23 a33
-    // b02 b12 a22 b32 b03 b13 b23 b33
-    tmp_0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
-    tmp_1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
-    tmp_2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
-    tmp_3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
-    // a00 a10 a20 a30   b00 b10 b20 b30
-    // a01 a11 a21 a31   b01 b11 b21 b31
-    // a02 a12 a22 a32   b02 b12 b22 b32
-    // a03 a13 a23 a33   b03 b13 b23 b33
-  }
-
-  // Vertical pass and difference of weighted sums.
-  {
-    // Load all inputs.
-    const __m128i w_0 = _mm_loadu_si128((const __m128i*)&w[0]);
-    const __m128i w_8 = _mm_loadu_si128((const __m128i*)&w[8]);
-
-    // Calculate a and b (two 4x4 at once).
-    const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
-    const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
-    const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
-    const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
-    const __m128i b0 = _mm_add_epi16(a0, a1);
-    const __m128i b1 = _mm_add_epi16(a3, a2);
-    const __m128i b2 = _mm_sub_epi16(a3, a2);
-    const __m128i b3 = _mm_sub_epi16(a0, a1);
-
-    // Separate the transforms of inA and inB.
-    __m128i A_b0 = _mm_unpacklo_epi64(b0, b1);
-    __m128i A_b2 = _mm_unpacklo_epi64(b2, b3);
-    __m128i B_b0 = _mm_unpackhi_epi64(b0, b1);
-    __m128i B_b2 = _mm_unpackhi_epi64(b2, b3);
-
-    A_b0 = _mm_abs_epi16(A_b0);
-    A_b2 = _mm_abs_epi16(A_b2);
-    B_b0 = _mm_abs_epi16(B_b0);
-    B_b2 = _mm_abs_epi16(B_b2);
-
-    // weighted sums
-    A_b0 = _mm_madd_epi16(A_b0, w_0);
-    A_b2 = _mm_madd_epi16(A_b2, w_8);
-    B_b0 = _mm_madd_epi16(B_b0, w_0);
-    B_b2 = _mm_madd_epi16(B_b2, w_8);
-    A_b0 = _mm_add_epi32(A_b0, A_b2);
-    B_b0 = _mm_add_epi32(B_b0, B_b2);
-
-    // difference of weighted sums
-    A_b2 = _mm_sub_epi32(A_b0, B_b0);
-    // cascading summation of the differences
-    B_b0 = _mm_hadd_epi32(A_b2, A_b2);
-    B_b2 = _mm_hadd_epi32(B_b0, B_b0);
-    return _mm_cvtsi128_si32(B_b2);
-  }
-}
-
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
-                    const uint16_t* const w) {
-  const int diff_sum = TTransform(a, b, w);
-  return abs(diff_sum) >> 5;
-}
-
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
-  int D = 0;
-  int x, y;
-  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
-    for (x = 0; x < 16; x += 4) {
-      D += Disto4x4(a + x + y, b + x + y, w);
-    }
-  }
-  return D;
-}
-
-//------------------------------------------------------------------------------
-// Quantization
-//
-
-// Generates a pshufb constant for shuffling 16b words.
-#define PSHUFB_CST(A,B,C,D,E,F,G,H) \
-  _mm_set_epi8(2 * (H) + 1, 2 * (H) + 0, 2 * (G) + 1, 2 * (G) + 0, \
-               2 * (F) + 1, 2 * (F) + 0, 2 * (E) + 1, 2 * (E) + 0, \
-               2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, \
-               2 * (B) + 1, 2 * (B) + 0, 2 * (A) + 1, 2 * (A) + 0)
-
-static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
-                                       const uint16_t* const sharpen,
-                                       const VP8Matrix* const mtx) {
-  const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
-  const __m128i zero = _mm_setzero_si128();
-  __m128i out0, out8;
-  __m128i packed_out;
-
-  // Load all inputs.
-  __m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
-  __m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
-  const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq_[0]);
-  const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq_[8]);
-  const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q_[0]);
-  const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q_[8]);
-
-  // coeff = abs(in)
-  __m128i coeff0 = _mm_abs_epi16(in0);
-  __m128i coeff8 = _mm_abs_epi16(in8);
-
-  // coeff = abs(in) + sharpen
-  if (sharpen != NULL) {
-    const __m128i sharpen0 = _mm_loadu_si128((const __m128i*)&sharpen[0]);
-    const __m128i sharpen8 = _mm_loadu_si128((const __m128i*)&sharpen[8]);
-    coeff0 = _mm_add_epi16(coeff0, sharpen0);
-    coeff8 = _mm_add_epi16(coeff8, sharpen8);
-  }
-
-  // out = (coeff * iQ + B) >> QFIX
-  {
-    // doing calculations with 32b precision (QFIX=17)
-    // out = (coeff * iQ)
-    const __m128i coeff_iQ0H = _mm_mulhi_epu16(coeff0, iq0);
-    const __m128i coeff_iQ0L = _mm_mullo_epi16(coeff0, iq0);
-    const __m128i coeff_iQ8H = _mm_mulhi_epu16(coeff8, iq8);
-    const __m128i coeff_iQ8L = _mm_mullo_epi16(coeff8, iq8);
-    __m128i out_00 = _mm_unpacklo_epi16(coeff_iQ0L, coeff_iQ0H);
-    __m128i out_04 = _mm_unpackhi_epi16(coeff_iQ0L, coeff_iQ0H);
-    __m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
-    __m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
-    // out = (coeff * iQ + B)
-    const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias_[0]);
-    const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias_[4]);
-    const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias_[8]);
-    const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias_[12]);
-    out_00 = _mm_add_epi32(out_00, bias_00);
-    out_04 = _mm_add_epi32(out_04, bias_04);
-    out_08 = _mm_add_epi32(out_08, bias_08);
-    out_12 = _mm_add_epi32(out_12, bias_12);
-    // out = QUANTDIV(coeff, iQ, B, QFIX)
-    out_00 = _mm_srai_epi32(out_00, QFIX);
-    out_04 = _mm_srai_epi32(out_04, QFIX);
-    out_08 = _mm_srai_epi32(out_08, QFIX);
-    out_12 = _mm_srai_epi32(out_12, QFIX);
-
-    // pack result as 16b
-    out0 = _mm_packs_epi32(out_00, out_04);
-    out8 = _mm_packs_epi32(out_08, out_12);
-
-    // if (coeff > 2047) coeff = 2047
-    out0 = _mm_min_epi16(out0, max_coeff_2047);
-    out8 = _mm_min_epi16(out8, max_coeff_2047);
-  }
-
-  // put sign back
-  out0 = _mm_sign_epi16(out0, in0);
-  out8 = _mm_sign_epi16(out8, in8);
-
-  // in = out * Q
-  in0 = _mm_mullo_epi16(out0, q0);
-  in8 = _mm_mullo_epi16(out8, q8);
-
-  _mm_storeu_si128((__m128i*)&in[0], in0);
-  _mm_storeu_si128((__m128i*)&in[8], in8);
-
-  // zigzag the output before storing it. The re-ordering is:
-  //    0 1 2 3 4 5 6 7 | 8  9 10 11 12 13 14 15
-  // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15
-  // There's only two misplaced entries ([8] and [7]) that are crossing the
-  // reg's boundaries.
-  // We use pshufb instead of pshuflo/pshufhi.
-  {
-    const __m128i kCst_lo = PSHUFB_CST(0, 1, 4, -1, 5, 2, 3, 6);
-    const __m128i kCst_7 = PSHUFB_CST(-1, -1, -1, -1, 7, -1, -1, -1);
-    const __m128i tmp_lo = _mm_shuffle_epi8(out0, kCst_lo);
-    const __m128i tmp_7 = _mm_shuffle_epi8(out0, kCst_7);  // extract #7
-    const __m128i kCst_hi = PSHUFB_CST(1, 4, 5, 2, -1, 3, 6, 7);
-    const __m128i kCst_8 = PSHUFB_CST(-1, -1, -1, 0, -1, -1, -1, -1);
-    const __m128i tmp_hi = _mm_shuffle_epi8(out8, kCst_hi);
-    const __m128i tmp_8 = _mm_shuffle_epi8(out8, kCst_8);  // extract #8
-    const __m128i out_z0 = _mm_or_si128(tmp_lo, tmp_8);
-    const __m128i out_z8 = _mm_or_si128(tmp_hi, tmp_7);
-    _mm_storeu_si128((__m128i*)&out[0], out_z0);
-    _mm_storeu_si128((__m128i*)&out[8], out_z8);
-    packed_out = _mm_packs_epi16(out_z0, out_z8);
-  }
-
-  // detect if all 'out' values are zeroes or not
-  return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
-}
-
-#undef PSHUFB_CST
-
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
-  return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
-}
-
-static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
-                            const VP8Matrix* const mtx) {
-  return DoQuantizeBlock(in, out, NULL, mtx);
-}
-
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
-  int nz;
-  const uint16_t* const sharpen = &mtx->sharpen_[0];
-  nz  = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
-  nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
-  return nz;
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspInitSSE41(void);
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE41(void) {
-  VP8CollectHistogram = CollectHistogram;
-  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
-  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
-  VP8TDisto4x4 = Disto4x4;
-  VP8TDisto16x16 = Disto16x16;
-}
-
-#else  // !WEBP_USE_SSE41
-
-WEBP_DSP_INIT_STUB(VP8EncDspInitSSE41)
-
-#endif  // WEBP_USE_SSE41
--- a/Show More
+++ b/Show More