rescaler_mips32: disable ImportRowShrink

this function is failing the 'accum == 0' assert on skia bots for rescaling to 13x13 BUG=skia:6682 Change-Id: I9f9f3adf28cec63ad6e38ed3128f18825d5b70cc
2026-04-09 22:30:02 +02:00 · 2017-06-02 19:58:33 -07:00
222 changed files with 5982 additions and 10796 deletions
--- a/Android.mk
+++ b/Android.mk
@@ -11,24 +11,12 @@ ifeq ($(APP_OPTIM),release)
  endif
 endif

-# mips32 fails to build with clang from r14b
-# https://bugs.chromium.org/p/webp/issues/detail?id=343
-ifeq ($(findstring clang,$(NDK_TOOLCHAIN_VERSION)),clang)
-  ifeq ($(TARGET_ARCH),mips)
-    clang_version := $(shell $(TARGET_CC) --version)
-    ifneq ($(findstring clang version 3,$(clang_version)),)
-      WEBP_CFLAGS += -no-integrated-as
-    endif
-  endif
-endif
-
 ifneq ($(findstring armeabi-v7a, $(TARGET_ARCH_ABI)),)
  # Setting LOCAL_ARM_NEON will enable -mfpu=neon which may cause illegal
  # instructions to be generated for armv7a code. Instead target the neon code
  # specifically.
  NEON := c.neon
  USE_CPUFEATURES := yes
-  WEBP_CFLAGS += -DHAVE_CPU_FEATURES_H
 else
  NEON := c
 endif
@@ -55,6 +43,9 @@ dsp_dec_srcs := \
    src/dsp/alpha_processing_neon.$(NEON) \
    src/dsp/alpha_processing_sse2.c \
    src/dsp/alpha_processing_sse41.c \
+    src/dsp/argb.c \
+    src/dsp/argb_mips_dsp_r2.c \
+    src/dsp/argb_sse2.c \
    src/dsp/cpu.c \
    src/dsp/dec.c \
    src/dsp/dec_clip_tables.c \
@@ -88,7 +79,6 @@ dsp_dec_srcs := \
    src/dsp/yuv.c \
    src/dsp/yuv_mips32.c \
    src/dsp/yuv_mips_dsp_r2.c \
-    src/dsp/yuv_neon.$(NEON) \
    src/dsp/yuv_sse2.c \

 dsp_enc_srcs := \
@@ -111,13 +101,10 @@ dsp_enc_srcs := \
    src/dsp/lossless_enc_neon.$(NEON) \
    src/dsp/lossless_enc_sse2.c \
    src/dsp/lossless_enc_sse41.c \
-    src/dsp/ssim.c \
-    src/dsp/ssim_sse2.c \

 enc_srcs := \
    src/enc/alpha_enc.c \
    src/enc/analysis_enc.c \
-    src/enc/backward_references_cost_enc.c \
    src/enc/backward_references_enc.c \
    src/enc/config_enc.c \
    src/enc/cost_enc.c \
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,21 +3,13 @@ cmake_minimum_required(VERSION 2.8.7)
 project(libwebp C)

 # Options for coder / decoder executables.
-option(WEBP_ENABLE_SIMD "Enable any SIMD optimization." ON)
 option(WEBP_BUILD_CWEBP "Build the cwebp command line tool." OFF)
 option(WEBP_BUILD_DWEBP "Build the dwebp command line tool." OFF)
 option(WEBP_BUILD_GIF2WEBP "Build the gif2webp conversion tool." OFF)
 option(WEBP_BUILD_IMG2WEBP "Build the img2webp animation tool." OFF)
-option(WEBP_BUILD_WEBPINFO "Build the webpinfo command line tool." OFF)
-option(WEBP_BUILD_WEBP_JS "Emscripten build of webp.js." OFF)
-option(WEBP_ENABLE_NEAR_LOSSLESS "Enable near-lossless encoding" ON)
 option(WEBP_EXPERIMENTAL_FEATURES "Build with experimental features." OFF)
 option(WEBP_ENABLE_SWAP_16BIT_CSP "Enable byte swap for 16 bit colorspaces." OFF)

-if(WEBP_BUILD_WEBP_JS)
-  set(WEBP_ENABLE_SIMD OFF)
-endif()
-
 set(WEBP_DEP_LIBRARIES)
 set(WEBP_DEP_INCLUDE_DIRS)

@@ -27,14 +19,12 @@ if(NOT CMAKE_BUILD_TYPE)
  )
 endif()

-# Include dependencies.
-include(cmake/deps.cmake)
-include(GNUInstallDirs)
+include(cmake/config.h.cmake)

 ################################################################################
 # Options.
 if(WEBP_ENABLE_SWAP_16BIT_CSP)
-  add_definitions(-DWEBP_SWAP_16BIT_CSP=1)
+  add_definitions(-DWEBP_SWAP_16BIT_CSP)
 endif()

 ################################################################################
@@ -49,148 +39,48 @@ if(ANDROID)
  set(WEBP_DEP_INCLUDE_DIRS ${WEBP_DEP_INCLUDE_DIRS}
    ${ANDROID_NDK}/sources/android/cpufeatures
  )
-  add_definitions(-DHAVE_CPU_FEATURES_H=1)
-  set(HAVE_CPU_FEATURES_H 1)
-else()
-  set(HAVE_CPU_FEATURES_H 0)
 endif()

 ################################################################################
 # WebP source files.
 # Read the Makefile.am to get the source files.

-# We expect the Makefiles to define the sources as defined in
-# the first regex. E.g.:
-# libimagedec_la_SOURCES  = image_dec.c image_dec.h
-function(parse_Makefile_am FOLDER VAR SRC_REGEX)
+function(parse_Makefile_am FOLDER VAR)
  file(READ ${FOLDER}/Makefile.am MAKEFILE_AM)
-  string(REGEX MATCHALL "${SRC_REGEX}_SOURCES[ ]*\\+?=[ ]+[0-9a-z\\._ ]*"
+  string(REGEX MATCHALL "_SOURCES \\+= [^\n]*"
    FILES_PER_LINE ${MAKEFILE_AM}
  )
  set(SRCS ${${VAR}})
  foreach(FILES ${FILES_PER_LINE})
-    string(FIND ${FILES} "=" OFFSET)
-    math(EXPR OFFSET "${OFFSET} + 2")
-    string(SUBSTRING ${FILES} ${OFFSET} -1 FILES)
-    if(FILES)
-      string(REGEX MATCHALL "[0-9a-z\\._]+"
-        FILES ${FILES}
-      )
-      foreach(FILE ${FILES})
-        list(APPEND SRCS ${FOLDER}/${FILE})
-      endforeach()
-    endif()
+    string(SUBSTRING ${FILES} 12 -1 FILES)
+    string(REGEX MATCHALL "[0-9a-z\\._]+"
+      FILES ${FILES}
+    )
+    foreach(FILE ${FILES})
+      list(APPEND SRCS ${FOLDER}/${FILE})
+    endforeach()
  endforeach()
  set(${VAR} ${SRCS} PARENT_SCOPE)
 endfunction()

-set(WEBP_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
-parse_Makefile_am(${WEBP_SRC_DIR}/dec "WEBP_DEC_SRCS" "")
-parse_Makefile_am(${WEBP_SRC_DIR}/demux "WEBP_DEMUX_SRCS" "")
-parse_Makefile_am(${WEBP_SRC_DIR}/dsp "WEBP_DSP_COMMON_SRCS" "COMMON")
-parse_Makefile_am(${WEBP_SRC_DIR}/dsp "WEBP_DSP_ENC_SRCS" "ENC")
-parse_Makefile_am(${WEBP_SRC_DIR}/dsp "WEBP_DSP_ENC_SRCS" "dsp_[^ ]*")
-parse_Makefile_am(${WEBP_SRC_DIR}/dsp "WEBP_DSP_DEC_SRCS" "decode_[^ ]*")
-parse_Makefile_am(${WEBP_SRC_DIR}/enc "WEBP_ENC_SRCS" "")
-parse_Makefile_am(${WEBP_SRC_DIR}/utils "WEBP_UTILS_COMMON_SRCS" "COMMON")
-parse_Makefile_am(${WEBP_SRC_DIR}/utils "WEBP_UTILS_ENC_SRCS" "ENC")
-parse_Makefile_am(${WEBP_SRC_DIR}/utils "WEBP_UTILS_DEC_SRCS" "decode_[^ ]*")
+set(WEBP_SRCS)
+parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/dec "WEBP_SRCS")
+parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/demux "WEBP_SRCS")
+parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/dsp "WEBP_SRCS")
+parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/enc "WEBP_SRCS")
+parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/utils "WEBP_SRCS")

 # Remove the files specific to SIMD we don't use.
 foreach(FILE ${WEBP_SIMD_FILES_NOT_TO_INCLUDE})
-  list(REMOVE_ITEM WEBP_DSP_ENC_SRCS ${FILE})
-  list(REMOVE_ITEM WEBP_DSP_DEC_SRCS ${FILE})
+  list(REMOVE_ITEM WEBP_SRCS ${FILE})
 endforeach()

-### Define the mandatory libraries.
-# Build the webpdecoder library.
-if(MSVC)
-  # avoid security warnings for e.g., fopen() used in the examples.
-  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
-else()
-  add_definitions(-Wall)
-endif()
-include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${WEBP_DEP_INCLUDE_DIRS})
-add_library(webpdecode OBJECT ${WEBP_DEC_SRCS})
-add_library(webpdspdecode OBJECT ${WEBP_DSP_COMMON_SRCS} ${WEBP_DSP_DEC_SRCS})
-add_library(webputilsdecode OBJECT ${WEBP_UTILS_COMMON_SRCS}
-  ${WEBP_UTILS_DEC_SRCS})
-add_library(webpdecoder $<TARGET_OBJECTS:webpdecode>
-  $<TARGET_OBJECTS:webpdspdecode> $<TARGET_OBJECTS:webputilsdecode>)
-target_link_libraries(webpdecoder ${WEBP_DEP_LIBRARIES})
-
-# Build the webp library.
-add_library(webpencode OBJECT ${WEBP_ENC_SRCS})
-target_include_directories(
-  webpencode PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
-                     ${CMAKE_CURRENT_SOURCE_DIR}/src)
-add_library(webpdsp OBJECT ${WEBP_DSP_COMMON_SRCS} ${WEBP_DSP_DEC_SRCS}
-                           ${WEBP_DSP_ENC_SRCS})
-target_include_directories(webpdsp PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
-                                           ${CMAKE_CURRENT_SOURCE_DIR})
-add_library(webputils OBJECT ${WEBP_UTILS_COMMON_SRCS} ${WEBP_UTILS_DEC_SRCS}
-                             ${WEBP_UTILS_ENC_SRCS})
-target_include_directories(webputils PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
-                                             ${CMAKE_CURRENT_SOURCE_DIR})
-add_library(webp $<TARGET_OBJECTS:webpdecode> $<TARGET_OBJECTS:webpdsp>
-                 $<TARGET_OBJECTS:webpencode> $<TARGET_OBJECTS:webputils>)
-if(XCODE)
-  libwebp_add_stub_file(webp)
-endif()
+# Build the library.
+add_definitions(-Wall)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/ ${WEBP_DEP_INCLUDE_DIRS})
+add_library(webp ${WEBP_SRCS})
 target_link_libraries(webp ${WEBP_DEP_LIBRARIES})
-target_include_directories(
-  webp PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
-  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
-         $<INSTALL_INTERFACE:include>)
-set_target_properties(
-  webp
-  PROPERTIES PUBLIC_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/src/webp/decode.h;\
-${CMAKE_CURRENT_SOURCE_DIR}/src/webp/encode.h;\
-${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h")

-# Make sure the OBJECT libraries are built with position independent code
-# (it is not ON by default).
-set_target_properties(webpdecode webpdspdecode webputilsdecode
-  webpencode webpdsp webputils PROPERTIES POSITION_INDEPENDENT_CODE ON)
-
-# Build the webp demux library.
-add_library(webpdemux ${WEBP_DEMUX_SRCS})
-target_link_libraries(webpdemux webp)
-target_include_directories(
-  webpdemux PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
-  PUBLIC $<INSTALL_INTERFACE:include>)
-set_target_properties(
-  webpdemux
-  PROPERTIES
-    PUBLIC_HEADER
-    "${CMAKE_CURRENT_SOURCE_DIR}/src/webp/decode.h;\
-${CMAKE_CURRENT_SOURCE_DIR}/src/webp/demux.h;\
-${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux_types.h;\
-${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h")
-
-# Set the version numbers.
-function(parse_version FILE NAME VAR)
-  file(READ ${CMAKE_CURRENT_SOURCE_DIR}/src/${FILE} SOURCE_FILE)
-  string(REGEX MATCH "${NAME}_la_LDFLAGS[^\n]* -version-info [0-9:]+" TMP
-    ${SOURCE_FILE})
-  string(REGEX MATCH "[0-9:]+" TMP ${TMP})
-  string(REGEX REPLACE ":" "." VERSION ${TMP})
-  set(${VAR} "${VERSION}" PARENT_SCOPE)
-endfunction()
-parse_version(Makefile.am webp WEBP_WEBP_SOVERSION)
-set_target_properties(webp PROPERTIES VERSION ${PACKAGE_VERSION}
-  SOVERSION ${WEBP_WEBP_SOVERSION})
-parse_version(Makefile.am webpdecoder WEBP_DECODER_SOVERSION)
-set_target_properties(webpdecoder PROPERTIES VERSION ${PACKAGE_VERSION}
-  SOVERSION ${WEBP_DECODER_SOVERSION})
-parse_version(demux/Makefile.am webpdemux WEBP_DEMUX_SOVERSION)
-set_target_properties(webpdemux PROPERTIES VERSION ${PACKAGE_VERSION}
-  SOVERSION ${WEBP_DEMUX_SOVERSION})
-
-# Define the libraries to install.
-set(INSTALLED_LIBRARIES webpdecoder webp webpdemux)
-
-### Deal with SIMD.
 # Change the compile flags for SIMD files we use.
 list(LENGTH WEBP_SIMD_FILES_TO_INCLUDE WEBP_SIMD_FILES_TO_INCLUDE_LENGTH)
 math(EXPR WEBP_SIMD_FILES_TO_INCLUDE_RANGE
@@ -207,201 +97,93 @@ endforeach()

 # Build the executables if asked for.
 if(WEBP_BUILD_CWEBP OR WEBP_BUILD_DWEBP OR
-   WEBP_BUILD_GIF2WEBP OR WEBP_BUILD_IMG2WEBP OR WEBP_BUILD_WEBP_JS)
+   WEBP_BUILD_GIF2WEBP OR WEBP_BUILD_IMG2WEBP)
  # Example utility library.
-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/examples "EXAMPLEUTIL_SRCS"
-    "example_util_[^ ]*")
-  list(APPEND EXAMPLEUTIL_SRCS
-    ${CMAKE_CURRENT_SOURCE_DIR}/examples/stopwatch.h)
-  add_library(exampleutil ${EXAMPLEUTIL_SRCS})
+  set(exampleutil_SRCS
+    ${CMAKE_CURRENT_SOURCE_DIR}/examples/stopwatch.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/examples/example_util.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/examples/example_util.h)
+  add_library(exampleutil ${exampleutil_SRCS})
+  target_link_libraries(exampleutil webp ${WEBP_DEP_LIBRARIES})

-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/imageio "IMAGEIOUTILS_SRCS"
-    "imageio_util_[^ ]*")
-  add_library(imageioutil ${IMAGEIOUTILS_SRCS})
-  target_link_libraries(imageioutil webp)
+  set(imageioutil_SRCS
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/imageio_util.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/imageio_util.h)
+  add_library(imageioutil ${imageioutil_SRCS})
+  target_link_libraries(imageioutil ${WEBP_DEP_LIBRARIES})

  # Image-decoding utility library.
-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/imageio "IMAGEDEC_SRCS"
-    "imagedec_[^ ]*")
-  add_library(imagedec ${IMAGEDEC_SRCS})
-  target_link_libraries(imagedec imageioutil webp ${WEBP_DEP_IMG_LIBRARIES})
+  set(imagedec_SRCS
+    ${CMAKE_CURRENT_SOURCE_DIR}/examples/gifdec.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/examples/gifdec.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/image_dec.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/image_dec.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/jpegdec.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/jpegdec.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/metadata.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/metadata.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/pngdec.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/pngdec.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/tiffdec.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/tiffdec.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/webpdec.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/webpdec.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/wicdec.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/wicdec.h)
+  add_library(imagedec ${imagedec_SRCS})
+  target_link_libraries(imagedec webp ${WEBP_DEP_LIBRARIES}
+    ${WEBP_DEP_IMG_LIBRARIES})

  # Image-encoding utility library.
-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/imageio "IMAGEENC_SRCS"
-    "imageenc_[^ ]*")
-  add_library(imageenc ${IMAGEENC_SRCS})
-  target_link_libraries(imageenc webp)
-
-  set_property(TARGET exampleutil imageioutil imagedec imageenc
-    PROPERTY INCLUDE_DIRECTORIES
-    ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
+  set(imageenc_SRCS
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/image_enc.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/imageio/image_enc.h)
+  add_library(imageenc ${imageenc_SRCS})
+  target_link_libraries(imageenc webp imageioutil
+    ${WEBP_DEP_LIBRARIES} ${WEBP_DEP_IMG_LIBRARIES})
 endif()

 if(WEBP_BUILD_DWEBP)
  # dwebp
  include_directories(${WEBP_DEP_IMG_INCLUDE_DIRS})
-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/examples "DWEBP_SRCS"
-    "dwebp")
-  add_executable(dwebp ${DWEBP_SRCS})
-  target_link_libraries(dwebp exampleutil imagedec imageenc webpdecoder)
-  install(TARGETS dwebp RUNTIME DESTINATION bin)
-  set_property(TARGET dwebp PROPERTY INCLUDE_DIRECTORIES
-    ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
+  add_executable(dwebp
+    ${CMAKE_CURRENT_SOURCE_DIR}/examples/dwebp.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/examples/stopwatch.h)
+  target_link_libraries(dwebp imagedec imageenc webp
+    exampleutil imageioutil
+    ${WEBP_DEP_LIBRARIES} ${WEBP_DEP_IMG_LIBRARIES}
+  )
 endif()

 if(WEBP_BUILD_CWEBP)
  # cwebp
  include_directories(${WEBP_DEP_IMG_INCLUDE_DIRS})
-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/examples "CWEBP_SRCS"
-    "cwebp")
-  add_executable(cwebp ${CWEBP_SRCS})
-  target_link_libraries(cwebp exampleutil imagedec webp)
-  install(TARGETS cwebp RUNTIME DESTINATION bin)
-  set_property(TARGET cwebp PROPERTY INCLUDE_DIRECTORIES
-    ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
-endif()
-
-if(WEBP_BUILD_GIF2WEBP AND NOT GIF_FOUND)
-  unset(WEBP_BUILD_GIF2WEBP CACHE)
-endif()
-
-if(WEBP_BUILD_GIF2WEBP OR WEBP_BUILD_IMG2WEBP)
-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/mux "WEBP_MUX_SRCS"
-    "")
-  add_library(webpmux ${WEBP_MUX_SRCS})
-  target_link_libraries(webpmux webp)
-  parse_version(mux/Makefile.am webpmux WEBP_MUX_SOVERSION)
-  set_target_properties(webpmux PROPERTIES VERSION ${PACKAGE_VERSION}
-    SOVERSION ${WEBP_MUX_SOVERSION})
-  set_target_properties(
-    webpmux
-    PROPERTIES PUBLIC_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux.h;\
-${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux_types.h;\
-${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h;")
-  list(APPEND INSTALLED_LIBRARIES webpmux)
+  add_executable(cwebp
+    ${CMAKE_CURRENT_SOURCE_DIR}/examples/cwebp.c
+    ${CMAKE_CURRENT_SOURCE_DIR}/examples/stopwatch.h)
+  target_link_libraries(cwebp imagedec webp exampleutil imageioutil
+    ${WEBP_DEP_LIBRARIES} ${WEBP_DEP_IMG_LIBRARIES}
+  )
 endif()

 if(WEBP_BUILD_GIF2WEBP)
  # gif2webp
-  include_directories(${WEBP_DEP_GIF_INCLUDE_DIRS})
-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/examples "GIF2WEBP_SRCS"
-    "gif2webp")
+  include_directories(${WEBP_DEP_IMG_INCLUDE_DIRS})
+  set(GIF2WEBP_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/examples/gif2webp.c)
+  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/mux "GIF2WEBP_SRCS")
  add_executable(gif2webp ${GIF2WEBP_SRCS})
-  target_link_libraries(gif2webp exampleutil imageioutil webp webpmux
-    ${WEBP_DEP_GIF_LIBRARIES})
-  install(TARGETS gif2webp RUNTIME DESTINATION bin)
-  set_property(TARGET gif2webp PROPERTY INCLUDE_DIRECTORIES
-    ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
+  target_link_libraries(gif2webp imagedec webp exampleutil imageioutil
+    ${WEBP_DEP_LIBRARIES} ${WEBP_DEP_IMG_LIBRARIES}
+  )
 endif()

 if(WEBP_BUILD_IMG2WEBP)
  # img2webp
  include_directories(${WEBP_DEP_IMG_INCLUDE_DIRS})
-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/examples "IMG2WEBP_SRCS"
-    "img2webp")
+  set(IMG2WEBP_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/examples/img2webp.c)
+  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/mux "IMG2WEBP_SRCS")
  add_executable(img2webp ${IMG2WEBP_SRCS})
-  target_link_libraries(img2webp exampleutil imagedec imageioutil webp webpmux)
-  install(TARGETS img2webp RUNTIME DESTINATION bin)
-  set_property(TARGET img2webp PROPERTY INCLUDE_DIRECTORIES
-    ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
+  target_link_libraries(img2webp imagedec webp exampleutil imageioutil
+    ${WEBP_DEP_LIBRARIES} ${WEBP_DEP_IMG_LIBRARIES}
+  )
 endif()
-
-if (WEBP_BUILD_WEBPINFO)
-  # webpinfo
-  include_directories(${WEBP_DEP_IMG_INCLUDE_DIRS})
-  parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/examples "WEBPINFO_SRCS"
-    "webpinfo")
-  add_executable(webpinfo ${WEBPINFO_SRCS})
-  target_link_libraries(webpinfo exampleutil imageioutil)
-  install(TARGETS webpinfo RUNTIME DESTINATION bin)
-  set_property(TARGET webpinfo PROPERTY INCLUDE_DIRECTORIES
-    ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
-endif()
-
-if(WEBP_BUILD_WEBP_JS)
-  # JavaScript version
-  add_executable(webp_js
-                 ${CMAKE_CURRENT_SOURCE_DIR}/extras/webp_to_sdl.c)
-  target_link_libraries(webp_js webpdecoder SDL)
-  set(WEBP_HAVE_SDL 1)
-  set_target_properties(webp_js PROPERTIES LINK_FLAGS
-      "-s EXPORTED_FUNCTIONS='[\"_WebpToSDL\"]' -s INVOKE_RUN=0")
-  set_target_properties(webp_js PROPERTIES OUTPUT_NAME webp)
-  target_compile_definitions(webp_js PUBLIC EMSCRIPTEN WEBP_HAVE_SDL)
-
-  # WASM version
-  add_executable(webp_wasm
-                 ${CMAKE_CURRENT_SOURCE_DIR}/extras/webp_to_sdl.c)
-  target_link_libraries(webp_wasm webpdecoder SDL)
-  set_target_properties(webp_wasm PROPERTIES LINK_FLAGS
-      "-s WASM=1 -s 'BINARYEN_METHOD=\"native-wasm\"' \
-      -s EXPORTED_FUNCTIONS='[\"_WebpToSDL\"]' -s INVOKE_RUN=0")
-  target_compile_definitions(webp_wasm PUBLIC EMSCRIPTEN WEBP_HAVE_SDL)
-
-  target_compile_definitions(webpdecoder PUBLIC EMSCRIPTEN)
-endif()
-
-# Generate the config.h file.
-configure_file(${CMAKE_CURRENT_LIST_DIR}/cmake/config.h.in
-  ${CMAKE_CURRENT_BINARY_DIR}/src/webp/config.h)
-add_definitions(-DHAVE_CONFIG_H)
-# The webp folder is included as we reference config.h as
-# ../webp/config.h or webp/config.h
-include_directories(${CMAKE_CURRENT_BINARY_DIR})
-
-# Install the different headers and libraries.
-install(
-  TARGETS ${INSTALLED_LIBRARIES}
-  EXPORT WebPTargets
-  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/webp
-  INCLUDES
-  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-set(ConfigPackageLocation ${CMAKE_INSTALL_DATADIR}/WebP/cmake/)
-install(EXPORT WebPTargets NAMESPACE WebP::
-        DESTINATION ${ConfigPackageLocation})
-
-# Create the CMake version file.
-include(CMakePackageConfigHelpers)
-write_basic_package_version_file(
-  "${CMAKE_CURRENT_BINARY_DIR}/WebPConfigVersion.cmake"
-  VERSION ${PACKAGE_VERSION}
-  COMPATIBILITY AnyNewerVersion
-)
-
-# Create the Config file.
-include(CMakePackageConfigHelpers)
-set(ConfigPackageLocation share/WebP/cmake/)
-configure_package_config_file(
-  ${CMAKE_CURRENT_SOURCE_DIR}/cmake/WebPConfig.cmake.in
-  ${CMAKE_CURRENT_BINARY_DIR}/WebPConfig.cmake
-  INSTALL_DESTINATION ${ConfigPackageLocation}
-  PATH_VARS CMAKE_INSTALL_INCLUDEDIR)
-
-# Install the generated CMake files.
-install(
-  FILES "${CMAKE_CURRENT_BINARY_DIR}/WebPConfigVersion.cmake"
-        "${CMAKE_CURRENT_BINARY_DIR}/WebPConfig.cmake"
-  DESTINATION ${ConfigPackageLocation}
-)
-
-# Install the man pages.
-set(MAN_PAGES cwebp.1 dwebp.1 gif2webp.1 img2webp.1 vwebp.1 webpmux.1
-  webpinfo.1)
-set(EXEC_BUILDS "CWEBP" "DWEBP" "GIF2WEBP" "IMG2WEBP" "VWEBP" "WEBPMUX"
-  "WEBPINFO")
-list(LENGTH MAN_PAGES MAN_PAGES_LENGTH)
-math(EXPR MAN_PAGES_RANGE "${MAN_PAGES_LENGTH} - 1")
-
-foreach(I_MAN RANGE ${MAN_PAGES_RANGE})
-  list(GET EXEC_BUILDS ${I_MAN} EXEC_BUILD)
-  if(WEBP_BUILD_${EXEC_BUILD})
-    list(GET MAN_PAGES ${I_MAN} MAN_PAGE)
-    install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man/${MAN_PAGE}
-      DESTINATION ${CMAKE_INSTALL_PREFIX}/share/man/man1
-      COMPONENT doc
-    )
-  endif()
-endforeach()
--- a/285
+++ b/285
@@ -1,294 +1,9 @@
-f66955de WEBP_REDUCE_CSP: restrict colorspace support
-a289d8e7 update ChangeLog (tag: v0.6.1-rc2)
-c10a493c vwebp: disable double buffering on windows & mac
-0d4466c2 webp_to_sdl.c: fix file mode
-1b27bf8b WEBP_REDUCE_SIZE: disable all rescaler code
-126be109 webpinfo: add -version option
-9add62b5 bump version to 0.6.1
-d3e26144 update NEWS
-2edda639 README: add webpinfo section
-9ca568ef Merge "right-size some tables"
-31f1995c Merge "SSE2 implementation of HasAlphaXXX"
-a80c46bd SSE2 implementation of HasAlphaXXX
-083507f2 right-size some tables
-2e5785b2 anim_utils.c: remove warning when !defined(WEBP_HAVE_GIF)
-b299c47e add WEBP_REDUCE_SIZE
-f593d71a enc: disable pic->stats/extra_info w/WEBP_DISABLE_STATS
-541179a9 Merge "predictor_enc: fix build w/--disable-near-lossless"
-5755a7ec predictor_enc: fix build w/--disable-near-lossless
-eab5bab7 add WEBP_DISABLE_STATS
-8052c585 remove some petty TODOs from vwebp.
-c245343d move LOAD8x4 and STORE8x2 closer to their use location
-b9e734fd dec,cosmetics: normalize function naming style
-c188d546 dec: harmonize function suffixes
-28c5ac81 dec_sse41: harmonize function suffixes
-e65b72a3 Merge "introduce WebPHasAlpha8b and WebPHasAlpha32b"
-b94cee98 dec_sse2: remove HE8uv_SSE2
-44a0ee3f introduce WebPHasAlpha8b and WebPHasAlpha32b
-aebf59ac Merge "WebPPictureAllocARGB: align argb allocation"
-c184665e WebPPictureAllocARGB: align argb allocation
-3daf7509 WebPParseHeaders: remove obsolete animation TODO
-80285d97 cmake: avoid security warnings under msvc
-650eac55 cmake: don't set -Wall with MSVC
-c462cd00 Remove useless code.
-01a98217 Merge "remove WebPWorkerImpl declaration from the header"
-3c49fc47 Merge "thread_utils: fix potentially bad call to Execute"
-fde2782e thread_utils: fix potentially bad call to Execute
-2a270c1d remove WebPWorkerImpl declaration from the header
-f1f437cc remove mention of 'lossy-only parameters' from the doc
-3879074d Merge "WebPMemToUint32: remove ptr cast to int"
-04b029d2 WebPMemToUint32: remove ptr cast to int
-b7971d0e dsp: avoid defining _C functions w/NEON builds
-6ba98764 webpdec: correct alloc size check w/use_argb
-5cfb3b0f normalize include guards
-f433205e Merge changes Ia17c7dfc,I75423abb,Ia2f716b4,I161caa14,I4210081a, ...
-8d033b14 {dec,enc}_neon: harmonize function suffixes x2
-0295e981 upsampling_neon: harmonize function suffixes
-d572c4e5 yuv_neon: harmonize function suffixes
-ab9c2500 rescaler_neon: harmonize function suffixes
-93e0ce27 lossless_neon: harmonize function suffixes
-22fbc50e lossless_enc_neon: harmonize function suffixes
-447875b4 filters_neon,cosmetics: fix indent
-e51bdd43 remove unused VP8TokenToStats() function
-785da7ea enc_neon: harmonize function suffixes
-bc1a251f dec_neon: harmonize function suffixes
-61e535f1 dsp/lossless: workaround gcc-4.8 bug on arm
-68b2eab7 cwebp: fix alpha reporting w/lossless & metadata
-30042faa WebPDemuxGetI: add doc details around WebPFormatFeature
-0a17f471 Merge "WIP: list includes as descendants of the project dir"
-a4399721 WIP: list includes as descendants of the project dir
-08275708 Merge "Make sure we reach the full range for alpha blending."
-d361a6a7 yuv_sse2: harmonize function suffixes
-6921aa6f upsampling_sse2: harmonize function suffixes
-08c67d3e ssim_sse2: harmonize function suffixes
-582a1b57 rescaler_sse2: harmonize function suffixes
-2c1b18ba lossless_sse2: harmonize function suffixes
-0ac46e81 lossless_enc_sse2: harmonize function suffixes
-bc634d57 enc_sse2: harmonize function suffixes
-bcb7347c dec_sse2: harmonize function suffixes
-e14ad93c Make sure we reach the full range for alpha blending.
-7038ca8d demux,StoreFrame: restore hdr size check to min req
-fb3daad6 cpu: fix ssse3 check
-be590e06 Merge "Fix CMake redefinition for HAVE_CPU_FEATURES_H"
-35f736e1 Fix CMake redefinition for HAVE_CPU_FEATURES_H
-a5216efc Fix integer overflow warning.
-a9c8916b decode.h,WebPIDecGetRGB: clarify output ptr validity
-3c74c645 gif2webp: handle 1-frame case properly + fix anim_diff
-c7f295d3 Merge "gif2webp: introduce -loop_compatibility option"
-b4e04677 gif2webp: introduce -loop_compatibility option
-f78da3de add LOCAL_CLANG_PREREQ and avoid WORK_AROUND_GCC w/3.8+
-01c426f1 define WEBP_USE_INTRINSICS w/gcc-4.9+
-8635973d use sdl-config (if available) to determine the link flags
-e9459382 use CPPFLAGS before CFLAGS
-4a9d788e Merge "Android.mk,mips: fix clang build with r15"
-4fbdc9fb Android.mk,mips: fix clang build with r15
-a80fcc4a ifdef code not used by Chrome/Android.
-3993af12 Fix signed integer overflows.
-f66f94ef anim_dump: small tool to dump frames from animated WebP
-6eba857b Merge "rationalize the Makefile.am"
-c5e34fba function definition cleanup
-3822762a rationalize the Makefile.am
-501ef6e4 configure style fix: animdiff -> anim_diff
-f8bdc268 Merge "protect against NULL dump_folder[] value in ReadAnimatedImage()"
-23bfc652 protect against NULL dump_folder[] value in ReadAnimatedImage()
-8dc3d71b cosmetics,ReadAnimatedWebP: correct function comment
-5bd40066 Merge changes I66a64a0a,I4d2e520f
-7945575c cosmetics,webpinfo: remove an else after a return
-8729fa11 cosmetics,cwebp: remove an else after a return
-f324b7f9 cosmetics: normalize fn proto & decl param names
-869eb369 CMake cleanups.
-289e62a3 Remove declaration of unimplemented VP8ApplyNearLosslessPredict
-20a94186 pnmdec,PAM: validate depth before calculating bytes_per_px
-34130afe anim_encode: fix integer overflow
-42c79aa6 Merge "Encoder: harmonize function suffixes"
-b09307dc Encoder: harmonize function suffixes
-bed0456d Merge "SSIM: harmonize the function suffix"
-54f6a3cf lossless_sse2.c: fix some missed suffix changes
-088f1dcc SSIM: harmonize the function suffix
-86fc4dd9 webpdec: use ImgIoUtilCheckSizeArgumentsOverflow
-08ea9ecd imageio: add ability restrict max image size
-6f9daa4a jpegdec,ReadError: fix leaks on error
-a0f72a4f VP8LTransformColorFunc: drop an non-respected 'const' from the signature.
-8c934902 Merge "Lossess dec: harmonize the function suffixes"
-622242aa Lossess dec: harmonize the function suffixes
-1411f027 Lossless Enc: harmonize the function suffixes
-24ad2e3c add const to two variables
-46efe062 Merge "Allow the lossless cruncher to work for alpha."
-8c3f9a47 Speed-up LZ77.
-1aef4c71 Allow the lossless cruncher to work for alpha.
-b8821dbd Improve the box LZ77 speed.
-7beed280 add missing ()s to macro parameters
-6473d20b Merge "fix Android standalone toolchain build"
-dcefed95 Merge "build.gradle: fix arm64 build"
-0c83a8bc Merge "yuv: harmonize suffix naming"
-c6d1db4b fix Android standalone toolchain build
-663a6d9d unify the ALTERNATE_CODE flag usage
-73ea9f27 yuv: harmonize suffix naming
-c71b68ac build.gradle: fix arm64 build
-c4568b47 Rescaler: harmonize the suffix naming
-6cb13b05 Merge "alpha_processing: harmonize the naming suffixes to be _C()"
-83a3e69a Merge "simplify WEBP_EXTERN macro"
-7295fde2 Merge "filters: harmonize the suffixes naming to _SSE2(), _C(), etc."
-8e42ba4c simplify WEBP_EXTERN macro
-331ab34b cost*.c: harmonize the suffix namings
-b161f670 filters: harmonize the suffixes naming to _SSE2(), _C(), etc.
-dec5e4d3 alpha_processing: harmonize the naming suffixes to be _C()
-6878d427 fix memory leak in SDL_Init()
-461ae555 Merge "configure: fix warnings in sdl check"
-62486a22 configure: test for -Wundef
-92982609 dsp.h: fix -Wundef w/__mips_dsp_rev
-0265cede configure: fix warnings in sdl check
-88c73d8a backward_references_enc.h: fix WINDOW_SIZE_BITS check
-4ea49f6b rescaler_sse2.c: fix WEBP_RESCALER_FIX -> _RFIX typo
-1b526638 Clean-up some CMake
-87f57a4b Merge "cmake: fix gif lib detection when cross compiling"
-b34a9db1 cosmetics,dec_sse2: remove some redundant comments
-471c5755 cmake: fix gif lib detection when cross compiling
-c793417a cmake: disable gif2webp if gif lib isn't found
-dcbc1c88 cmake: split gif detection from IMG deps
-66ad84f0 Merge "muxread: remove unreachable code"
-50ec3ab7 muxread: remove unreachable code
-7d67a164 Lossy encoding: smoothen transparent areas to improve compression
-e50650c7 Merge "fix signature for DISABLE_TOKEN_BUFFER compilation"
-671d2567 fix signature for DISABLE_TOKEN_BUFFER compilation
-d6755580 cpu.cmake: use unique flag to test simd disable flags
-28914528 Merge "Remove the argb* files."
-8acb4942 Remove the argb* files.
-3b62347b README: correct cmake invocation note
-7ca0df13 Have the SSE2 version of PackARGB use common code.
-7b250459 Merge "Re-use the transformed image when trying several LZ77 in lossless."
-e132072f Re-use the transformed image when trying several LZ77 in lossless.
-5d7a50ef Get code to compile in C++.
-7b012987 configure: test for -Wparentheses-equality
-f0569adb Fix man pages for multi-threading.
-f1d5a397 multithread cruncher: only copy stats when picture->stats != NULL
-f8c2ac15 Multi-thread the lossless cruncher.
-a88c6522 Merge "Integrate a new LZ77 looking for matches in the neighborhood of a pixel only."
-8f6df1d0 Unroll Predictors 10, 11 and 12.
-355c3d1b Integrate a new LZ77 looking for matches in the neighborhood of a pixel only.
-a1779a01 Refactor LZ77 handling in preparation for a new method.
-67de68b5 Android.mk/build.gradle: fix mips build with clang from r14b
-f209a548 Use the plane code and not the distance when computing statistics.
-b903b80c Split cost-based backward references in its own file.
-498cad34 Cosmetic changes in backward reference.
-e4eb4587 lossless, VP8LTransformColor_C: make sure no overflow happens with colors.
-af6deaff webpinfo: handle alpha flag mismatch
-7caef29b Fix typo that creeped in.
-39e19f92 Merge "near lossless: fix unsigned int overflow warnings."
-9bbc0891 near lossless: fix unsigned int overflow warnings.
-e1118d62 Merge "cosmetics,FindClosestDiscretized: use uint in mask creation"
-186bc9b7 Merge "webpinfo: tolerate ALPH+VP8L"
-b5887297 cosmetics,FindClosestDiscretized: use uint in mask creation
-f1784aee near_lossless,FindClosestDiscretized: use unsigned ops
-0d20abb3 webpinfo: tolerate ALPH+VP8L
-972104b3 webpmux: tolerate false positive Alpha flag
-dd7e83cc tiffdec,ReadTIFF: ensure data_size is < tsize_t max
-d988eb7b tiffdec,MyRead: quiet -Wshorten-64-to-32 warning
-dabda707 webpinfo: add support to parse Alpha bitstream
-4c117643 webpinfo: correct background color output, BGRA->ARGB
-defc98d7 Doc: clarify the role of quality in WebPConfig.
-d78ff780 Merge "Fix code to compile with C++."
-c8f14093 Fix code to compile with C++.
-497dc6a7 pnmdec: sanitize invalid header output
-d78e5867 Merge "configure: test for -Wconstant-conversion"
-481e91eb Merge "pnmdec,PAM: set bytes_per_px based on depth when missing"
-93b12753 configure: test for -Wconstant-conversion
-645f0c53 pnmdec,PAM: set bytes_per_px based on depth when missing
-e9154605 Merge "vwebp: activate GLUT double-buffering"
-818d795b vwebp: activate GLUT double-buffering
-d63e6f4b Add a man page for webpinfo
-4d708435 Merge "NEON: implement ConvertRGB24ToY/BGR24/ARGB/RGBA32ToUV/ARGBToUV"
-faf42213 NEON: implement ConvertRGB24ToY/BGR24/ARGB/RGBA32ToUV/ARGBToUV
-b4d576fa Install man pages with CMake.
-cbc1b921 webpinfo: add features to parse bitstream header
-e644c556 Fix bad bit writer initialization.
-b62cdad2 Merge "Implement a cruncher for lossless at method 6."
-da3e4dfb use the exact constant for the gamma transfer function
-a9c701e0 Merge "tiffdec: fix EXTRASAMPLES check"
-adab8ce0 Implement a cruncher for lossless at method 6.
-1b92b237 Merge "Fix VP8ApplyNearLossless to respect const and stride."
-1923ff02 tiffdec: fix EXTRASAMPLES check
-97cce5ba tiffdec: only request EXTRASAMPLES w/> 3 samples/px
-0dcd85b6 Fix VP8ApplyNearLossless to respect const and stride.
-f7682189 yuv: rationalize the C/SSE2 function naming
-52245424 NEON implementation of some Sharp-YUV420 functions
-690efd82 Avoid several backward reference copies.
-4bb1f607 src/dec/vp8_dec.h, cosmetics: fix comments
-285748be cmake: build/install webpinfo
-78fd199c backward_references_enc.c: clear -Wshadow warnings
-ae836410 WebPLog2FloorC: clear -Wshadow warning
-d0b7404e Merge "WASM support"
-134e314f WASM support
-c08adb6f Merge "VP8LEnc: remove use of BitsLog2Ceiling()"
-28c37ebd VP8LEnc: remove use of BitsLog2Ceiling()
-2cb58ab2 webpinfo: output format as a human readable string
-bb175a93 Merge "rename some symbols clashing with MSVC headers"
-39eda658 Remove a duplicated pixel hash implementation.
-36b8274d rename some symbols clashing with MSVC headers
-274daf54 Add webpinfo tool.
-ec5036e4 add explicit reference to /usr/local/{lib,inc}
-18f0dfac Merge "fix TIFF encoder regarding rgbA/RGBA"
-4e2b0b50 Merge "webpdec.h: fix a doc typo"
-e2eeabff Merge "Install binaries, libraries and headers in CMake."
-836607e6 webpdec.h: fix a doc typo
-9273e441 fix TIFF encoder regarding rgbA/RGBA
-17e3c11f Add limited PAM decoding support
-5f624871 Install binaries, libraries and headers in CMake.
-976adac1 Merge "lossless incremental decoding: fix missing eos_ test"
-f8fad4fa lossless incremental decoding: fix missing eos_ test
-27415d41 Merge "vwebp_sdl: fix the makefile.unix"
-49566182 Merge "ImgIoUtilWriteFile(): use ImgIoUtilSetBinaryMode"
-6f75a51b Analyze the transform entropy on the whole image.
-a5e4e3af Use palette only if we can in entropy analysis.
-75a9c3c4 Improve compression by better entropy analysis.
-39cf6f4f vwebp_sdl: fix the makefile.unix
-699b0416 ImgIoUtilWriteFile(): use ImgIoUtilSetBinaryMode
-7d985bd1 Fix small entropy analysis bug.
-6e7caf06 Optimize the color cache size.
-833c9219 More efficient stochastic histogram merge.
-5183326b Refactor the greedy histogram merge.
-99f6f462 Merge "histogram_enc.c,MyRand: s/ul/u/ for unsigned constants"
-80a22186 ssim.c: remove dead include
-a128dfff histogram_enc.c,MyRand: s/ul/u/ for unsigned constants
-693bf74e move the SSIM calculation code in ssim.c / ssim_sse2.c
-10d791ca Merge "Fix the random generator in HistogramCombineStochastic."
-fa63a966 Fix the random generator in HistogramCombineStochastic.
-16be192f VP8LSetBitPos: remove the eos_ setting
-027151ca don't erase the surface before blitting.
-4105d565 disable WEBP_USE_XXX optimisations when EMSCRIPTEN is defined
-9ee32a75 Merge "WebP-JS: emscripten-based Javascript decoder"
-ca9f7b7d WebP-JS: emscripten-based Javascript decoder
-868aa690 Perform greedy histogram merge in a unified way.
-5b393f2d Merge "fix path typo for vwebp_sdl in Makefile.vc"
-e0012bea CMake: only use libwebpdecoder for building dwebp
-84c2a7b0 fix path typo for vwebp_sdl in Makefile.vc
-1b0e4abf Merge "Add a flag to disable SIMD optimizations."
-32263250 Add a flag to disable SIMD optimizations.
-b494fdec optimize the ARGB->ARGB Import to use memcpy
-f1536039 Merge "ReadWebP: decode directly into a pre-allocated buffer"
-e69ed291 ReadWebP: decode directly into a pre-allocated buffer
-57d8de8a Merge "vwebp_sdl: simple viewer based on SDL"
-5cfd4ebc LZ77 interval speedups. Faster, smaller, simpler.
-1e7ad88b PNM header decoder: add some basic numerical validation
-17c7890c Merge "Add a decoder only library for WebP in CMake."
-be733786 Merge "Add clang build fix for MSA"
-03cda0e4 Add a decoder only library for WebP in CMake.
-aa893914 Add clang build fix for MSA
-31a92e97 Merge "imageio: add limited PNM support for reading"
-dcf9d82a imageio: add limited PNM support for reading
-6524fcd6 vwebp_sdl: simple viewer based on SDL
-6cf24a24 get_disto: fix reference file read
-43d472aa Merge tag 'v0.6.0'
-50d1a848 update ChangeLog (tag: v0.6.0, origin/0.6.0, 0.6.0)
 20a7fea0 extras/Makefile.am: fix libwebpextras.la reference
 415f3ffe update ChangeLog (tag: v0.6.0-rc3)
 3c6d1224 update NEWS
 ee4a4141 update AUTHORS
 32ed856f Fix "all|no frames are keyframes" settings.
-1c3190b6 Merge "Fix "all|no frames are keyframes" settings."
 f4dc56fd disable GradientUnfilter_NEON
-4f3e3bbd disable GradientUnfilter_NEON
-2dc0bdca Fix "all|no frames are keyframes" settings.
 0d8e0588 img2webp: treat -loop as a no-op w/single images
 b0450139 ReadImage(): restore size reporting
 0ad3b4ef update ChangeLog (tag: v0.6.0-rc2)
--- a/Makefile.vc
+++ b/Makefile.vc
@@ -29,7 +29,7 @@ PLATFORM_LDFLAGS = /SAFESEH
 NOLOGO     = /nologo
 CCNODBG    = cl.exe $(NOLOGO) /O2 /DNDEBUG
 CCDEBUG    = cl.exe $(NOLOGO) /Od /Gm /Zi /D_DEBUG /RTC1
-CFLAGS     = /I. /Isrc $(NOLOGO) /W3 /EHsc /c
+CFLAGS     = /Isrc $(NOLOGO) /W3 /EHsc /c
 CFLAGS     = $(CFLAGS) /DWIN32 /D_CRT_SECURE_NO_WARNINGS /DWIN32_LEAN_AND_MEAN
 LDFLAGS    = /LARGEADDRESSAWARE /MANIFEST /NXCOMPAT /DYNAMICBASE
 LDFLAGS    = $(LDFLAGS) $(PLATFORM_LDFLAGS)
@@ -155,7 +155,6 @@ CFGSET = TRUE
 !MESSAGE - all                            - build (de)mux-based targets for CFG
 !MESSAGE - gif2webp                       - requires libgif & >= VS2013
 !MESSAGE - anim_diff                      - requires libgif & >= VS2013
-!MESSAGE - anim_dump
 !MESSAGE
 !MESSAGE RTLIBCFG controls the runtime library linkage - 'static' or 'dynamic'.
 !MESSAGE   'legacy' will produce a Windows 2000 compatible library.
@@ -230,10 +229,12 @@ DSP_DEC_OBJS = \
    $(DIROBJ)\dsp\yuv.obj \
    $(DIROBJ)\dsp\yuv_mips32.obj \
    $(DIROBJ)\dsp\yuv_mips_dsp_r2.obj \
-    $(DIROBJ)\dsp\yuv_neon.obj \
    $(DIROBJ)\dsp\yuv_sse2.obj \

 DSP_ENC_OBJS = \
+    $(DIROBJ)\dsp\argb.obj \
+    $(DIROBJ)\dsp\argb_mips_dsp_r2.obj \
+    $(DIROBJ)\dsp\argb_sse2.obj \
    $(DIROBJ)\dsp\cost.obj \
    $(DIROBJ)\dsp\cost_mips32.obj \
    $(DIROBJ)\dsp\cost_mips_dsp_r2.obj \
@@ -253,8 +254,6 @@ DSP_ENC_OBJS = \
    $(DIROBJ)\dsp\lossless_enc_neon.obj \
    $(DIROBJ)\dsp\lossless_enc_sse2.obj \
    $(DIROBJ)\dsp\lossless_enc_sse41.obj \
-    $(DIROBJ)\dsp\ssim.obj \
-    $(DIROBJ)\dsp\ssim_sse2.obj \

 EX_ANIM_UTIL_OBJS = \
    $(DIROBJ)\examples\anim_util.obj \
@@ -264,7 +263,6 @@ IMAGEIO_DEC_OBJS = \
    $(DIROBJ)\imageio\jpegdec.obj \
    $(DIROBJ)\imageio\metadata.obj \
    $(DIROBJ)\imageio\pngdec.obj \
-    $(DIROBJ)\imageio\pnmdec.obj \
    $(DIROBJ)\imageio\tiffdec.obj \
    $(DIROBJ)\imageio\webpdec.obj \
    $(DIROBJ)\imageio\wicdec.obj \
@@ -281,7 +279,6 @@ EX_UTIL_OBJS = \
 ENC_OBJS = \
    $(DIROBJ)\enc\alpha_enc.obj \
    $(DIROBJ)\enc\analysis_enc.obj \
-    $(DIROBJ)\enc\backward_references_cost_enc.obj \
    $(DIROBJ)\enc\backward_references_enc.obj \
    $(DIROBJ)\enc\config_enc.obj \
    $(DIROBJ)\enc\cost_enc.obj \
@@ -347,8 +344,7 @@ all: ex
 OUT_EXAMPLES = $(DIRBIN)\cwebp.exe $(DIRBIN)\dwebp.exe
 EXTRA_EXAMPLES = $(DIRBIN)\vwebp.exe $(DIRBIN)\webpmux.exe \
                 $(DIRBIN)\img2webp.exe $(DIRBIN)\get_disto.exe \
-                 $(DIRBIN)\webp_quality.exe $(DIRBIN)\vwebp_sdl.exe \
-                 $(DIRBIN)\webpinfo.exe
+                 $(DIRBIN)\webp_quality.exe

 ex: $(OUT_LIBS) $(OUT_EXAMPLES)
 all: ex $(EXTRA_EXAMPLES)
@@ -356,15 +352,10 @@ all: ex $(EXTRA_EXAMPLES)
 # C99 support which is only available from VS2013 onward.
 gif2webp: $(DIRBIN)\gif2webp.exe
 anim_diff: $(DIRBIN)\anim_diff.exe
-anim_dump: $(DIRBIN)\anim_dump.exe

 $(DIRBIN)\anim_diff.exe: $(DIROBJ)\examples\anim_diff.obj $(EX_ANIM_UTIL_OBJS)
 $(DIRBIN)\anim_diff.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS)
 $(DIRBIN)\anim_diff.exe: $(EX_GIF_DEC_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
-$(DIRBIN)\anim_dump.exe: $(DIROBJ)\examples\anim_dump.obj $(EX_ANIM_UTIL_OBJS)
-$(DIRBIN)\anim_dump.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS)
-$(DIRBIN)\anim_dump.exe: $(EX_GIF_DEC_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
-$(DIRBIN)\anim_dump.exe: $(IMAGEIO_ENC_OBJS)
 $(DIRBIN)\cwebp.exe: $(DIROBJ)\examples\cwebp.obj $(IMAGEIO_DEC_OBJS)
 $(DIRBIN)\cwebp.exe: $(IMAGEIO_UTIL_OBJS)
 $(DIRBIN)\dwebp.exe: $(DIROBJ)\examples\dwebp.obj $(IMAGEIO_DEC_OBJS)
@@ -375,9 +366,6 @@ $(DIRBIN)\gif2webp.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS) $(LIBWEBPMUX)
 $(DIRBIN)\gif2webp.exe: $(LIBWEBP)
 $(DIRBIN)\vwebp.exe: $(DIROBJ)\examples\vwebp.obj $(EX_UTIL_OBJS)
 $(DIRBIN)\vwebp.exe: $(IMAGEIO_UTIL_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
-$(DIRBIN)\vwebp_sdl.exe: $(DIROBJ)\extras\vwebp_sdl.obj
-$(DIRBIN)\vwebp_sdl.exe: $(DIROBJ)\extras\webp_to_sdl.obj
-$(DIRBIN)\vwebp_sdl.exe: $(IMAGEIO_UTIL_OBJS) $(LIBWEBP)
 $(DIRBIN)\webpmux.exe: $(DIROBJ)\examples\webpmux.obj $(LIBWEBPMUX)
 $(DIRBIN)\webpmux.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS) $(LIBWEBP)
 $(DIRBIN)\img2webp.exe: $(DIROBJ)\examples\img2webp.obj $(LIBWEBPMUX)
@@ -385,12 +373,10 @@ $(DIRBIN)\img2webp.exe: $(IMAGEIO_DEC_OBJS)
 $(DIRBIN)\img2webp.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS) $(LIBWEBP)
 $(DIRBIN)\get_disto.exe: $(DIROBJ)\extras\get_disto.obj
 $(DIRBIN)\get_disto.exe: $(IMAGEIO_DEC_OBJS) $(IMAGEIO_UTIL_OBJS) $(LIBWEBP)
+
 $(DIRBIN)\webp_quality.exe: $(DIROBJ)\extras\webp_quality.obj
 $(DIRBIN)\webp_quality.exe: $(IMAGEIO_UTIL_OBJS)
 $(DIRBIN)\webp_quality.exe: $(EXTRAS_OBJS) $(LIBWEBP)
-$(DIRBIN)\webpinfo.exe: $(DIROBJ)\examples\webpinfo.obj
-$(DIRBIN)\webpinfo.exe: $(IMAGEIO_DEC_OBJS)
-$(DIRBIN)\webpinfo.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS) $(LIBWEBP)

 $(OUT_EXAMPLES): $(EX_UTIL_OBJS) $(LIBWEBP)
 $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS): $(OUTPUT_DIRS)
@@ -447,7 +433,7 @@ $(OUTPUT_DIRS):
 $(DIROBJ)\$(DLLINC):
 	@echo #ifndef WEBP_DLL_H_ > $@
 	@echo #define WEBP_DLL_H_ >> $@
-	@echo #define WEBP_EXTERN __declspec(dllexport) >> $@
+	@echo #define WEBP_EXTERN(type) __declspec(dllexport) type >> $@
 	@echo #endif  /* WEBP_DLL_H_ */ >> $@

 .SUFFIXES: .c .obj .res .exe
@@ -459,9 +445,6 @@ $(DIROBJ)\dsp\enc_avx2.obj: src\dsp\enc_avx2.c
 $(DIROBJ)\examples\anim_diff.obj: examples\anim_diff.c
 	$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
 	  /Fo$(DIROBJ)\examples\ examples\$(@B).c
-$(DIROBJ)\examples\anim_dump.obj: examples\anim_dump.c
-	$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
-	  /Fo$(DIROBJ)\examples\ examples\$(@B).c
 $(DIROBJ)\examples\anim_util.obj: examples\anim_util.c
 	$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
 	  /Fo$(DIROBJ)\examples\ examples\$(@B).c
--- a/13
+++ b/13
@@ -1,16 +1,3 @@
- 11/24/2017: version 0.6.1
-  This is a binary compatible release.
-  * lossless performance and compression improvements + a new 'cruncher' mode
-    (-m 6 -q 100)
-  * ARM performance improvements with clang (15-20% w/ndk r15c, issue #339)
-  * webp-js: emscripten/webassembly based javascript decoder
-  * miscellaneous bug & build fixes (issue #329, #332, #343, #353, #360, #361,
-    #363)
-  Tool updates / additions:
-    added webpinfo - prints file format information (issue #330)
-    gif2webp - loop behavior modified to match Chrome M63+ (crbug.com/649264);
-               '-loop_compatibility' can be used for the old behavior
-
 - 1/26/2017: version 0.6.0
  * lossless performance and compression improvements
  * miscellaneous performance improvements (SSE2, NEON, MSA)
--- a/36
+++ b/36
@@ -4,7 +4,7 @@
          \__\__/\____/\_____/__/ ____  ___
                / _/ /    \    \ /  _ \/ _/
               /  \_/   / /   \ \   __/  \__
-               \____/____/\_____/_____/____/v0.6.1
+               \____/____/\_____/_____/____/v0.6.0

 Description:
 ============
@@ -113,8 +113,8 @@ make install

 CMake:
 ------
-With CMake, you can compile libwebp, cwebp, dwebp, gif2web, img2webp, webpinfo
-and the JS bindings.
+The support for CMake is minimal: it only helps you compile libwebp, cwebp and
+dwebp.

 Prerequisites:
 A compiler (e.g., gcc with autotools) and CMake.
@@ -123,25 +123,18 @@ minimal build:
 $ sudo apt-get install build-essential cmake

 When building from git sources, you will need to run cmake to generate the
-makefiles.
+configure script.

 mkdir build && cd build && cmake ../
 make
 make install

-If you also want any of the executables, you will need to enable them through
-CMake, e.g.:
+If you also want cwebp or dwebp, you will need to enable them through CMake:

 cmake -DWEBP_BUILD_CWEBP=ON -DWEBP_BUILD_DWEBP=ON ../

 or through your favorite interface (like ccmake or cmake-qt-gui).

-Finally, once installed, you can also use WebP in your CMake project by doing:
-
-find_package(WebP)
-
-which will define the CMake variables WebP_INCLUDE_DIRS and WebP_LIBRARIES.
-
 Gradle:
 -------
 The support for Gradle is minimal: it only helps you compile libwebp, cwebp and
@@ -367,23 +360,6 @@ Use following options to convert into alternate image formats:
  -quiet ....... quiet mode, don't print anything
  -noasm ....... disable all assembly optimizations

-WebP file analysis tool:
-========================
-
-'webpinfo' can be used to print out the chunk level structure and bitstream
-header information of WebP files. It can also check if the files are of valid
-WebP format.
-
-Usage: webpinfo [options] in_files
-Note: there could be multiple input files;
-      options must come before input files.
-Options:
-  -version ........... Print version number and exit.
-  -quiet ............. Do not show chunk parsing information.
-  -diag .............. Show parsing error diagnosis.
-  -summary ........... Show chunk stats summary.
-  -bitstream_info .... Parse bitstream header.
-
 Visualization tool:
 ===================

@@ -494,8 +470,6 @@ Options:
  -metadata <string> ..... comma separated list of metadata to
                           copy from the input to the output if present
                           Valid values: all, none, icc, xmp (default)
-  -loop_compatibility .... use compatibility mode for Chrome
-                           version prior to M62 (inclusive)
  -mt .................... use multi-threading if available

  -version ............... print version number and exit
--- a/README.mux
+++ b/README.mux
@@ -1,7 +1,7 @@
          __   __  ____  ____  ____  __ __  _     __ __
         /  \\/  \/  _ \/  _ \/  _ \/  \  \/ \___/_ / _\
         \       /   __/  _  \   __/      /  /  (_/  /__
-          \__\__/\_____/_____/__/  \__//_/\_____/__/___/v0.4.1
+          \__\__/\_____/_____/__/  \__//_/\_____/__/___/v0.4.0


 Description:
--- a/README.webp_js
+++ b/README.webp_js
@@ -1,75 +0,0 @@
-     __   __ ____ ____ ____     __  ____
-    /  \\/  \  _ \  _ \  _ \   (__)/  __\
-    \       /  __/ _  \  __/   _)  \_   \
-     \__\__/_____/____/_/     /____/____/
-
-Description:
-============
-
-This file describes the compilation of libwebp into a JavaScript decoder
-using Emscripten and CMake.
-
- - install the Emscripten SDK following the procedure described at:
-   https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html
-   After installation, you should have some global variable positioned to the
-   location of the SDK. In particular, $EMSCRIPTEN should point to the
-   top-level directory containing Emscripten tools.
-
- - make sure the file $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake is
-   accessible. This is the toolchain file used by CMake to invoke Emscripten.
-
- - configure the project 'WEBP_JS' with CMake using:
-
- cd webp_js && \
- cmake -DWEBP_BUILD_WEBP_JS=ON \
-       -DEMSCRIPTEN_GENERATE_BITCODE_STATIC_LIBRARIES=1 \
-       -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
-       ../
-
- - compile webp.js using 'make'.
-
- - that's it! Upon completion, you should have the webp.js and
-   webp.js.mem files generated.
-
-The callable JavaScript function is WebPToSDL(), which decodes a raw WebP
-bitstream into a canvas. See webp_js/index.html for a simple usage sample.
-
-Demo HTML page:
-===============
-
-   The HTML page webp_js/index.html requires an HTTP server to serve the WebP
-   image example. It's easy to just use Python for that.
-
-cd webp_js && python -m SimpleHTTPServer 8080
-
-and then navigate to http://localhost:8080 in your favorite browser.
-
-
-Web-Assembly (WASM) version:
-============================
-
-  CMakeLists.txt is configured to build the WASM version when using
-  the option WEBP_BUILD_WEBP_JS=ON. The compilation step will assemble
-  the files 'webp_wasm.js', 'webp_wasm.wasm' in the webp_js/ directory.
-  See webp_js/index_wasm.html for a simple demo page using the WASM version
-  of the library.
-
-  You will need a fairly recent version of Emscripten (at least 1.37.8) and of
-  your WASM-enabled browser to run this version. Consider it very experimental!
-
-Caveat:
-=======
-
-  - First decoding using the library is usually slower, due to just-in-time
-    compilation.
-
-  - Some versions of llvm produce the following compile error when SSE2 is
-    enabled.
-
-"Unsupported:   %516 = bitcast <8 x i16> %481 to i128
- LLVM ERROR: BitCast Instruction not yet supported for integer types larger than 64 bits"
-
-    The corresponding Emscripten bug is at:
-    https://github.com/kripken/emscripten/issues/3788
-
-    Therefore, SSE2 optimization is currently disabled in CMakeLists.txt.
--- a/build.gradle
+++ b/build.gradle
@@ -74,22 +74,12 @@ model {
          cCompiler.args "-frename-registers -s"
        }
      }
-      // mips32 fails to build with clang from r14b
-      // https://bugs.chromium.org/p/webp/issues/detail?id=343
-      if (toolChain in Clang) {
-        if (getTargetPlatform() == "mips") {
-          cCompiler.args "-no-integrated-as"
-        }
-      }
      // Check for NEON usage.
-      if (getTargetPlatform() == "arm") {
+      if (getTargetPlatform() == "arm" || getTargetPlatform() == "arm64") {
        NEON = "c.neon"
-        cCompiler.define "HAVE_CPU_FEATURES_H"
      } else {
        NEON = "c"
      }
-
-      cCompiler.args "-I" + file(".").absolutePath
    }
    // Link to pthread for shared libraries.
    withType(SharedLibraryBinarySpec) {
@@ -122,6 +112,9 @@ model {
            include "alpha_processing_neon.$NEON"
            include "alpha_processing_sse2.c"
            include "alpha_processing_sse41.c"
+            include "argb.c"
+            include "argb_mips_dsp_r2.c"
+            include "argb_sse2.c"
            include "cpu.c"
            include "dec.c"
            include "dec_clip_tables.c"
@@ -155,7 +148,6 @@ model {
            include "yuv.c"
            include "yuv_mips32.c"
            include "yuv_mips_dsp_r2.c"
-            include "yuv_neon.$NEON"
            include "yuv_sse2.c"
            srcDir "src/utils"
            include "bit_reader_utils.c"
@@ -187,12 +179,9 @@ model {
            include "lossless_enc_neon.$NEON"
            include "lossless_enc_sse2.c"
            include "lossless_enc_sse41.c"
-            include "ssim.c"
-            include "ssim_sse2.c"
            srcDir "src/enc"
            include "alpha_enc.c"
            include "analysis_enc.c"
-            include "backward_references_cost_enc.c"
            include "backward_references_enc.c"
            include "config_enc.c"
            include "cost_enc.c"
@@ -299,7 +288,6 @@ model {
            include "jpegdec.c"
            include "metadata.c"
            include "pngdec.c"
-            include "pnmdec.c"
            include "tiffdec.c"
            include "webpdec.c"
          }
@@ -401,24 +389,6 @@ model {
        }
      }
    }
-
-    webpinfo_example(NativeExecutableSpec) {
-      binaries {
-        all {
-          lib library: "example_util", linkage: "static"
-          lib library: "imageio_util", linkage: "static"
-          lib library: "webp"
-        }
-      }
-      sources {
-        c {
-          source {
-            srcDir "./examples"
-            include "webpinfo.c"
-          }
-        }
-      }
-    }
  }
  tasks {
    // Task to test all possible configurations.
--- a/cmake/WebPConfig.cmake.in
+++ b/cmake/WebPConfig.cmake.in
@@ -1,19 +0,0 @@
-set(WebP_VERSION @PROJECT_VERSION@)
-set(WEBP_VERSION ${WebP_VERSION})
-
-@PACKAGE_INIT@
-
-if(@WEBP_USE_THREAD@)
-  include(CMakeFindDependencyMacro)
-  find_dependency(Threads REQUIRED)
-endif()
-
-include("${CMAKE_CURRENT_LIST_DIR}/WebPTargets.cmake")
-
-set_and_check(WebP_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
-set(WebP_INCLUDE_DIRS ${WebP_INCLUDE_DIR})
-set(WEBP_INCLUDE_DIRS ${WebP_INCLUDE_DIR})
-set(WebP_LIBRARIES "@INSTALLED_LIBRARIES@")
-set(WEBP_LIBRARIES "${WebP_LIBRARIES}")
-
-check_required_components(WebP)
--- a/cmake/config.h.cmake
+++ b/cmake/config.h.cmake
@@ -65,49 +65,14 @@ endif()
 # Find the standard image libraries.
 set(WEBP_DEP_IMG_LIBRARIES)
 set(WEBP_DEP_IMG_INCLUDE_DIRS)
-foreach(I_LIB PNG JPEG TIFF)
+foreach(I_LIB PNG JPEG TIFF GIF)
  find_package(${I_LIB})
  set(WEBP_HAVE_${I_LIB} ${${I_LIB}_FOUND})
  if(${I_LIB}_FOUND)
    list(APPEND WEBP_DEP_IMG_LIBRARIES ${${I_LIB}_LIBRARIES})
-    list(APPEND WEBP_DEP_IMG_INCLUDE_DIRS
-         ${${I_LIB}_INCLUDE_DIR} ${${I_LIB}_INCLUDE_DIRS})
+    list(APPEND WEBP_DEP_IMG_INCLUDE_DIRS ${${I_LIB}_INCLUDE_DIRS})
  endif()
 endforeach()
-if(WEBP_DEP_IMG_INCLUDE_DIRS)
-  list(REMOVE_DUPLICATES WEBP_DEP_IMG_INCLUDE_DIRS)
-endif()
-
-# GIF detection, gifdec isn't part of the imageio lib.
-include(CMakePushCheckState)
-set(WEBP_DEP_GIF_LIBRARIES)
-set(WEBP_DEP_GIF_INCLUDE_DIRS)
-find_package(GIF)
-set(WEBP_HAVE_GIF ${GIF_FOUND})
-if(GIF_FOUND)
-  # GIF find_package only locates the header and library, it doesn't fail
-  # compile tests when detecting the version, but falls back to 3 (as of at
-  # least cmake 3.7.2). Make sure the library links to avoid incorrect
-  # detection when cross compiling.
-  cmake_push_check_state()
-  set(CMAKE_REQUIRED_LIBRARIES ${GIF_LIBRARIES})
-  set(CMAKE_REQUIRED_INCLUDES ${GIF_INCLUDE_DIR})
-  check_c_source_compiles("
-      #include <gif_lib.h>
-      int main(void) {
-        (void)DGifOpenFileHandle;
-        return 0;
-      }
-      " GIF_COMPILES
-  )
-  cmake_pop_check_state()
-  if(GIF_COMPILES)
-    list(APPEND WEBP_DEP_GIF_LIBRARIES ${GIF_LIBRARIES})
-    list(APPEND WEBP_DEP_GIF_INCLUDE_DIRS ${GIF_INCLUDE_DIR})
-  else()
-    unset(GIF_FOUND)
-  endif()
-endif()

 ## Check for specific headers.
 include(CheckIncludeFiles)
@@ -164,3 +129,13 @@ strip_bracket(PACKAGE_URL)
 set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
 set(PACKAGE_TARNAME ${PACKAGE_NAME})
 set(VERSION ${PACKAGE_VERSION})
+
+## Generate the config.h header.
+configure_file(${CMAKE_CURRENT_LIST_DIR}/config.h.in
+  ${CMAKE_CURRENT_BINARY_DIR}/include/webp/config.h)
+add_definitions(-DHAVE_CONFIG_H)
+# The webp folder is included as we reference config.h as
+# ../webp/config.h or webp/config.h
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/include
+  ${CMAKE_CURRENT_BINARY_DIR}/include/webp
+)
--- a/cmake/config.h.in
+++ b/cmake/config.h.in
@@ -13,9 +13,6 @@
 /* Set to 1 if __builtin_bswap64 is available */
 #cmakedefine HAVE_BUILTIN_BSWAP64 1

-/* Define to 1 if you have the <cpu-features.h> header file. */
-#cmakedefine HAVE_CPU_FEATURES_H 1
-
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #cmakedefine HAVE_DLFCN_H 1

@@ -118,19 +115,9 @@
 /* Set to 1 if JPEG library is installed */
 #cmakedefine WEBP_HAVE_JPEG 1

-/* Set to 1 if NEON is supported */
-#cmakedefine WEBP_HAVE_NEON
-
-/* Set to 1 if runtime detection of NEON is enabled */
-/* TODO: handle properly in CMake */
-#cmakedefine WEBP_HAVE_NEON_RTCD
-
 /* Set to 1 if PNG library is installed */
 #cmakedefine WEBP_HAVE_PNG 1

-/* Set to 1 if SDL library is installed */
-#cmakedefine WEBP_HAVE_SDL 1
-
 /* Set to 1 if SSE2 is supported */
 #cmakedefine WEBP_HAVE_SSE2 1

@@ -140,9 +127,6 @@
 /* Set to 1 if TIFF library is installed */
 #cmakedefine WEBP_HAVE_TIFF 1

-/* Enable near lossless encoding */
-#cmakedefine WEBP_NEAR_LOSSLESS 1
-
 /* Undefine this to disable thread support. */
 #cmakedefine WEBP_USE_THREAD 1

--- a/cmake/cpu.cmake
+++ b/cmake/cpu.cmake
@@ -1,15 +1,7 @@
 ## Check for SIMD extensions.
-include(CMakePushCheckState)

-function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
-  if(NOT ENABLE_SIMD)
-    message(STATUS "Disabling ${WEBP_SIMD_FLAG} optimization.")
-    set(WEBP_HAVE_${WEBP_SIMD_FLAG} 0 PARENT_SCOPE)
-    return()
-  endif()
+function(webp_check_compiler_flag WEBP_SIMD_FLAG)
  unset(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG} CACHE)
-  cmake_push_check_state()
-  set(CMAKE_REQUIRED_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR})
  check_c_source_compiles("
      #include \"${CMAKE_CURRENT_LIST_DIR}/../src/dsp/dsp.h\"
      int main(void) {
@@ -20,7 +12,6 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
      }
    " WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG}
  )
-  cmake_pop_check_state()
  if(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG})
    set(WEBP_HAVE_${WEBP_SIMD_FLAG} 1 PARENT_SCOPE)
  else()
@@ -64,13 +55,12 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
  # First try with no extra flag added as the compiler might have default flags
  # (especially on Android).
  unset(WEBP_HAVE_${WEBP_SIMD_FLAG} CACHE)
-  cmake_push_check_state()
  set(CMAKE_REQUIRED_FLAGS)
-  webp_check_compiler_flag(${WEBP_SIMD_FLAG} ${WEBP_ENABLE_SIMD})
+  webp_check_compiler_flag(${WEBP_SIMD_FLAG})
  if(NOT WEBP_HAVE_${WEBP_SIMD_FLAG})
    list(GET SIMD_ENABLE_FLAGS ${I_SIMD} SIMD_COMPILE_FLAG)
    set(CMAKE_REQUIRED_FLAGS ${SIMD_COMPILE_FLAG})
-    webp_check_compiler_flag(${WEBP_SIMD_FLAG} ${WEBP_ENABLE_SIMD})
+    webp_check_compiler_flag(${WEBP_SIMD_FLAG})
  else()
    set(SIMD_COMPILE_FLAG " ")
  endif()
@@ -106,12 +96,11 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
            set(COMMON_PATTERNS)
          endif()
          set(CMAKE_REQUIRED_DEFINITIONS ${SIMD_COMPILE_FLAG})
-          check_c_source_compiles("int main(void) {return 0;}"
-            FLAG_${SIMD_COMPILE_FLAG}
+          check_c_source_compiles("int main(void) {return 0;}" FLAG2
            FAIL_REGEX "warning: argument unused during compilation:"
            ${COMMON_PATTERNS}
          )
-          if(NOT FLAG_${SIMD_COMPILE_FLAG})
+          if(NOT FLAG2)
            unset(HAS_COMPILE_FLAG CACHE)
          endif()
        endif()
@@ -121,5 +110,4 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
      endif()
    endif()
  endif()
-  cmake_pop_check_state()
 endforeach()
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([libwebp], [0.6.1],
+AC_INIT([libwebp], [0.6.0],
        [https://bugs.chromium.org/p/webp],,
        [http://developers.google.com/speed/webp])
 AC_CANONICAL_HOST
@@ -67,7 +67,6 @@ AC_DEFUN([TEST_AND_ADD_CFLAGS],
          CFLAGS="$SAVED_CFLAGS"])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-fvisibility=hidden])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wall])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wconstant-conversion])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wdeclaration-after-statement])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wextra])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wfloat-conversion])
@@ -76,10 +75,8 @@ TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wformat -Wformat-security])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wmissing-declarations])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wmissing-prototypes])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wold-style-definition])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wparentheses-equality])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshadow])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshorten-64-to-32])
-TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wundef])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunreachable-code])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused-but-set-variable])
 TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused])
@@ -244,13 +241,9 @@ AS_IF([test "x$enable_neon" != "xno"], [
          NEON_FLAGS=""],
          [AC_DEFINE(WEBP_HAVE_NEON_RTCD, [1],
                     [Set to 1 if runtime detection of NEON is enabled])])])
-
-      case "$host_os" in
-        *android*) AC_CHECK_HEADERS([cpu-features.h]) ;;
-      esac
-      ;;
-  esac
-  AC_SUBST([NEON_FLAGS])])
+        ;;
+    esac
+    AC_SUBST([NEON_FLAGS])])

 dnl === CLEAR_LIBVARS([var_pfx])
 dnl ===   Clears <var_pfx>_{INCLUDES,LIBS}.
@@ -435,44 +428,6 @@ AS_IF([test "x$enable_gl" != "xno"], [
 ])
 AM_CONDITIONAL([BUILD_VWEBP], [test "$build_vwebp" = "yes"])

-dnl === check for SDL support ===
-
-AC_ARG_ENABLE([sdl],
-              AS_HELP_STRING([--disable-sdl],
-                             [Disable detection of SDL support
-                              @<:@default=auto@:>@]))
-AS_IF([test "x$enable_sdl" != "xno"], [
-  CLEAR_LIBVARS([SDL])
-  WITHLIB_OPTION([sdl], [SDL])
-
-  sdl_header="no"
-  LIBCHECK_PROLOGUE([SDL])
-  AC_CHECK_HEADER([SDL/SDL.h], [sdl_header="SDL_SDL.h"],
-                  [AC_CHECK_HEADER([SDL.h], [sdl_header="SDL.h"],
-                  [AC_MSG_WARN(SDL library not available - no sdl.h)])])
-  if test x"$sdl_header" != "xno"; then
-    AC_CHECK_LIB(SDL, SDL_Init,
-                 [SDL_LIBS="-lSDL"
-                  SDL_INCLUDES="-DWEBP_HAVE_SDL"
-                  AC_DEFINE(WEBP_HAVE_SDL, [1],
-                            [Set to 1 if SDL library is installed])
-                  sdl_support=yes
-                 ],
-                 AC_MSG_WARN(Optional SDL library not found),
-                 [$MATH_LIBS])
-    if test x"$sdl_header" = "xSDL.h"; then
-      SDL_INCLUDES="$SDL_INCLUDES -DWEBP_HAVE_JUST_SDL_H"
-    fi
-  fi
-  LIBCHECK_EPILOGUE([SDL])
-
-  if test "$sdl_support" = "yes"; then
-    build_vwebp_sdl=yes
-  fi
-])
-
-AM_CONDITIONAL([BUILD_VWEBP_SDL], [test "$build_vwebp_sdl" = "yes"])
-
 dnl === check for PNG support ===

 AC_ARG_ENABLE([png], AS_HELP_STRING([--disable-png],
@@ -590,7 +545,7 @@ AS_IF([test "x$enable_gif" != "xno"], [

  if test "$gif_support" = "yes" -a \
          "$enable_libwebpdemux" = "yes"; then
-    build_anim_diff=yes
+    build_animdiff=yes
  fi

  if test "$gif_support" = "yes" -a \
@@ -598,7 +553,7 @@ AS_IF([test "x$enable_gif" != "xno"], [
    build_gif2webp=yes
  fi
 ])
-AM_CONDITIONAL([BUILD_ANIMDIFF], [test "${build_anim_diff}" = "yes"])
+AM_CONDITIONAL([BUILD_ANIMDIFF], [test "${build_animdiff}" = "yes"])
 AM_CONDITIONAL([BUILD_GIF2WEBP], [test "${build_gif2webp}" = "yes"])

 if test "$enable_libwebpmux" = "yes"; then
@@ -606,11 +561,6 @@ if test "$enable_libwebpmux" = "yes"; then
 fi
 AM_CONDITIONAL([BUILD_IMG2WEBP], [test "${build_img2webp}" = "yes"])

-if test "$enable_libwebpmux" = "yes"; then
-  build_webpinfo=yes
-fi
-AM_CONDITIONAL([BUILD_WEBPINFO], [test "${build_webpinfo}" = "yes"])
-
 dnl === check for WIC support ===

 AC_ARG_ENABLE([wic],
@@ -663,7 +613,7 @@ if test "$enable_wic" = "yes"; then
 fi
 esac

-dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP=1
+dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP

 USE_SWAP_16BIT_CSP=""
 AC_MSG_CHECKING(if --enable-swap-16bit-csp option is specified)
@@ -671,7 +621,7 @@ AC_ARG_ENABLE([swap-16bit-csp],
              AS_HELP_STRING([--enable-swap-16bit-csp],
                             [Enable byte swap for 16 bit colorspaces]))
 if test "$enable_swap_16bit_csp" = "yes"; then
-  USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP=1"
+  USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP"
 fi
 AC_MSG_RESULT(${enable_swap_16bit_csp-no})
 AC_SUBST(USE_SWAP_16BIT_CSP)
@@ -689,21 +639,6 @@ fi
 AC_MSG_RESULT(${enable_experimental-no})
 AC_SUBST(USE_EXPERIMENTAL_CODE)

-dnl === If --disable-near-lossless is defined, add -DWEBP_NEAR_LOSSLESS=0
-
-AC_DEFINE(WEBP_NEAR_LOSSLESS, [1], [Enable near lossless encoding])
-AC_MSG_CHECKING(if --disable-near-lossless option is specified)
-AC_ARG_ENABLE([near_lossless],
-              AS_HELP_STRING([--disable-near-lossless],
-                             [Disable near lossless encoding]),
-              [], [enable_near_lossless=yes])
-if test "$enable_near_lossless" = "no"; then
-  AC_DEFINE(WEBP_NEAR_LOSSLESS, [0], [Enable near lossless encoding])
-  AC_MSG_RESULT([yes])
-else
-  AC_MSG_RESULT([no])
-fi
-
 dnl === Check whether libwebpmux should be built
 AC_MSG_CHECKING(whether libwebpmux is to be built)
 AC_ARG_ENABLE([libwebpmux],
@@ -778,12 +713,9 @@ dwebp : yes
  PNG  : ${png_support-no}
  WIC  : ${wic_support-no}
 GIF support : ${gif_support-no}
-anim_diff   : ${build_anim_diff-no}
+anim_diff   : ${build_animdiff-no}
 gif2webp    : ${build_gif2webp-no}
 img2webp    : ${build_img2webp-no}
 webpmux     : ${enable_libwebpmux-no}
 vwebp       : ${build_vwebp-no}
-webpinfo    : ${build_webpinfo-no}
-SDL support : ${sdl_support-no}
-vwebp_sdl   : ${build_vwebp_sdl-no}
 ])
--- a/examples/Android.mk
+++ b/examples/Android.mk
@@ -80,19 +80,3 @@ LOCAL_STATIC_LIBRARIES := example_util imageio_util imagedec webpmux webp
 LOCAL_MODULE := img2webp_example

 include $(BUILD_EXECUTABLE)
-
-################################################################################
-# webpinfo
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-    webpinfo.c \
-
-LOCAL_CFLAGS := $(WEBP_CFLAGS)
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
-LOCAL_STATIC_LIBRARIES := example_util imageio_util webp
-
-LOCAL_MODULE := webpinfo_example
-
-include $(BUILD_EXECUTABLE)
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -2,7 +2,7 @@ AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src

 bin_PROGRAMS = dwebp cwebp
 if BUILD_ANIMDIFF
-  noinst_PROGRAMS = anim_diff anim_dump
+  noinst_PROGRAMS = anim_diff
 endif
 if BUILD_GIF2WEBP
  bin_PROGRAMS += gif2webp
@@ -16,9 +16,6 @@ endif
 if BUILD_VWEBP
  bin_PROGRAMS += vwebp
 endif
-if BUILD_WEBPINFO
-  bin_PROGRAMS += webpinfo
-endif

 noinst_LTLIBRARIES = libexample_util.la

@@ -27,36 +24,20 @@ libexample_util_la_LIBADD = ../src/libwebp.la

 anim_diff_SOURCES = anim_diff.c anim_util.c anim_util.h
 anim_diff_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
-anim_diff_LDADD  =
-anim_diff_LDADD += ../src/demux/libwebpdemux.la
-anim_diff_LDADD += libexample_util.la
-anim_diff_LDADD += ../imageio/libimageio_util.la
+anim_diff_LDADD  = ../src/demux/libwebpdemux.la
+anim_diff_LDADD += libexample_util.la ../imageio/libimageio_util.la
 anim_diff_LDADD += $(GIF_LIBS) -lm

-anim_dump_SOURCES = anim_dump.c anim_util.c anim_util.h
-anim_dump_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(PNG_INCLUDES)
-anim_dump_CPPFLAGS += $(GIF_INCLUDES)
-anim_dump_LDADD  =
-anim_dump_LDADD += ../src/demux/libwebpdemux.la
-anim_dump_LDADD += libexample_util.la
-anim_dump_LDADD += ../imageio/libimageio_util.la
-anim_dump_LDADD += ../imageio/libimageenc.la
-anim_dump_LDADD += $(PNG_LIBS) $(GIF_LIBS) $(TIFF_LIBS) -lm
-
 cwebp_SOURCES  = cwebp.c stopwatch.h
 cwebp_CPPFLAGS  = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
-cwebp_LDADD  =
-cwebp_LDADD += libexample_util.la
-cwebp_LDADD += ../imageio/libimageio_util.la
-cwebp_LDADD += ../imageio/libimagedec.la
-cwebp_LDADD += ../src/libwebp.la
+cwebp_LDADD  = libexample_util.la ../imageio/libimageio_util.la
+cwebp_LDADD += ../imageio/libimagedec.la ../src/libwebp.la
 cwebp_LDADD += $(JPEG_LIBS) $(PNG_LIBS) $(TIFF_LIBS)

 dwebp_SOURCES = dwebp.c stopwatch.h
 dwebp_CPPFLAGS  = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
 dwebp_CPPFLAGS += $(JPEG_INCLUDES) $(PNG_INCLUDES)
-dwebp_LDADD  =
-dwebp_LDADD += libexample_util.la
+dwebp_LDADD  = libexample_util.la
 dwebp_LDADD += ../imageio/libimagedec.la
 dwebp_LDADD += ../imageio/libimageenc.la
 dwebp_LDADD += ../imageio/libimageio_util.la
@@ -65,52 +46,30 @@ dwebp_LDADD +=$(PNG_LIBS) $(JPEG_LIBS)

 gif2webp_SOURCES = gif2webp.c gifdec.c gifdec.h
 gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
-gif2webp_LDADD  =
-gif2webp_LDADD += libexample_util.la
-gif2webp_LDADD += ../imageio/libimageio_util.la
-gif2webp_LDADD += ../src/mux/libwebpmux.la
-gif2webp_LDADD += ../src/libwebp.la
-gif2webp_LDADD += $(GIF_LIBS)
+gif2webp_LDADD  = libexample_util.la ../imageio/libimageio_util.la
+gif2webp_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la $(GIF_LIBS)

 vwebp_SOURCES = vwebp.c
 vwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GL_INCLUDES)
-vwebp_LDADD  =
-vwebp_LDADD += libexample_util.la
-vwebp_LDADD += ../imageio/libimageio_util.la
-vwebp_LDADD += ../src/demux/libwebpdemux.la
-vwebp_LDADD += $(GL_LIBS)
+vwebp_LDADD  = libexample_util.la ../imageio/libimageio_util.la
+vwebp_LDADD += ../src/demux/libwebpdemux.la $(GL_LIBS)

 webpmux_SOURCES = webpmux.c
 webpmux_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
-webpmux_LDADD  =
-webpmux_LDADD += libexample_util.la
-webpmux_LDADD += ../imageio/libimageio_util.la
-webpmux_LDADD += ../src/mux/libwebpmux.la
-webpmux_LDADD += ../src/libwebp.la
+webpmux_LDADD  = libexample_util.la ../imageio/libimageio_util.la
+webpmux_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la

 img2webp_SOURCES = img2webp.c
 img2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
-img2webp_LDADD  =
-img2webp_LDADD += libexample_util.la
-img2webp_LDADD += ../imageio/libimageio_util.la
+img2webp_LDADD  = libexample_util.la ../imageio/libimageio_util.la
 img2webp_LDADD += ../imageio/libimagedec.la
-img2webp_LDADD += ../src/mux/libwebpmux.la
-img2webp_LDADD += ../src/libwebp.la
+img2webp_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la
 img2webp_LDADD += $(PNG_LIBS) $(JPEG_LIBS) $(TIFF_LIBS)

-webpinfo_SOURCES = webpinfo.c
-webpinfo_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
-webpinfo_LDADD  =
-webpinfo_LDADD += libexample_util.la
-webpinfo_LDADD += ../imageio/libimageio_util.la
-webpinfo_LDADD += ../src/libwebp.la
-
 if BUILD_LIBWEBPDECODER
  anim_diff_LDADD += ../src/libwebpdecoder.la
-  anim_dump_LDADD += ../src/libwebpdecoder.la
  vwebp_LDADD += ../src/libwebpdecoder.la
 else
  anim_diff_LDADD += ../src/libwebp.la
-  anim_dump_LDADD += ../src/libwebp.la
  vwebp_LDADD += ../src/libwebp.la
 endif
--- a/examples/anim_diff.c
+++ b/examples/anim_diff.c
@@ -143,18 +143,8 @@ static int CompareAnimatedImagePair(const AnimatedImage* const img1,
  if (!ok) return 0;  // These are fatal failures, can't proceed.

  if (is_multi_frame_image) {  // Checks relevant for multi-frame images only.
-    int max_loop_count_workaround = 0;
-    // Transcodes to webp increase the gif loop count by 1 for compatibility.
-    // When the gif has the maximum value the webp value will be off by one.
-    if ((img1->format == ANIM_GIF && img1->loop_count == 65536 &&
-         img2->format == ANIM_WEBP && img2->loop_count == 65535) ||
-        (img1->format == ANIM_WEBP && img1->loop_count == 65535 &&
-         img2->format == ANIM_GIF && img2->loop_count == 65536)) {
-      max_loop_count_workaround = 1;
-    }
-    ok = (max_loop_count_workaround ||
-          CompareValues(img1->loop_count, img2->loop_count,
-                        "Loop count mismatch")) && ok;
+    ok = CompareValues(img1->loop_count, img2->loop_count,
+                       "Loop count mismatch") && ok;
    ok = CompareBackgroundColor(img1->bgcolor, img2->bgcolor,
                                premultiply) && ok;
  }
--- a/examples/anim_dump.c
+++ b/examples/anim_dump.c
@@ -1,104 +0,0 @@
-// Copyright 2017 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Decodes an animated WebP file and dumps the decoded frames as PNG or TIFF.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include <stdio.h>
-#include <string.h>  // for 'strcmp'.
-
-#include "./anim_util.h"
-#include "webp/decode.h"
-#include "../imageio/image_enc.h"
-
-#if defined(_MSC_VER) && _MSC_VER < 1900
-#define snprintf _snprintf
-#endif
-
-static void Help(void) {
-  printf("Usage: anim_dump [options] files...\n");
-  printf("\nOptions:\n");
-  printf("  -folder <string> .... dump folder (default: '.')\n");
-  printf("  -prefix <string> .... prefix for dumped frames "
-                                  "(default: 'dump_')\n");
-  printf("  -tiff ............... save frames as TIFF\n");
-  printf("  -pam ................ save frames as PAM\n");
-}
-
-int main(int argc, const char* argv[]) {
-  int error = 0;
-  const char* dump_folder = ".";
-  const char* prefix = "dump_";
-  const char* suffix = "png";
-  WebPOutputFileFormat format = PNG;
-  int c;
-
-  if (argc < 2) {
-    Help();
-    return -1;
-  }
-
-  for (c = 1; !error && c < argc; ++c) {
-    if (!strcmp(argv[c], "-folder")) {
-      if (c + 1 == argc) {
-        fprintf(stderr, "missing argument after option '%s'\n", argv[c]);
-        error = 1;
-        break;
-      }
-      dump_folder = argv[++c];
-    } else if (!strcmp(argv[c], "-prefix")) {
-      if (c + 1 == argc) {
-        fprintf(stderr, "missing argument after option '%s'\n", argv[c]);
-        error = 1;
-        break;
-      }
-      prefix = argv[++c];
-    } else if (!strcmp(argv[c], "-tiff")) {
-      format = TIFF;
-      suffix = "tiff";
-    } else if (!strcmp(argv[c], "-pam")) {
-      format = PAM;
-      suffix = "pam";
-    } else {
-      uint32_t i;
-      AnimatedImage image;
-      const char* const file = argv[c];
-      memset(&image, 0, sizeof(image));
-      printf("Decoding file: %s as %s/%sxxxx.%s\n",
-             file, dump_folder, prefix, suffix);
-      if (!ReadAnimatedImage(file, &image, 0, NULL)) {
-        fprintf(stderr, "Error decoding file: %s\n Aborting.\n", file);
-        error = 1;
-        break;
-      }
-      for (i = 0; !error && i < image.num_frames; ++i) {
-        char out_file[1024];
-        WebPDecBuffer buffer;
-        WebPInitDecBuffer(&buffer);
-        buffer.colorspace = MODE_RGBA;
-        buffer.is_external_memory = 1;
-        buffer.width = image.canvas_width;
-        buffer.height = image.canvas_height;
-        buffer.u.RGBA.rgba = image.frames[i].rgba;
-        buffer.u.RGBA.stride = buffer.width * sizeof(uint32_t);
-        buffer.u.RGBA.size = buffer.u.RGBA.stride * buffer.height;
-        snprintf(out_file, sizeof(out_file), "%s/%s%.4d.%s",
-                 dump_folder, prefix, i, suffix);
-        if (!WebPSaveImage(&buffer, format, out_file)) {
-          fprintf(stderr, "Error while saving image '%s'\n", out_file);
-          error = 1;
-        }
-        WebPFreeDecBuffer(&buffer);
-      }
-      ClearAnimatedImage(&image);
-    }
-  }
-  return error ? 1 : 0;
-}
--- a/examples/anim_util.c
+++ b/examples/anim_util.c
@@ -16,7 +16,7 @@
 #include <stdio.h>
 #include <string.h>

-#if defined(WEBP_HAVE_GIF)
+#ifdef WEBP_HAVE_GIF
 #include <gif_lib.h>
 #endif
 #include "webp/format_constants.h"
@@ -33,13 +33,11 @@ static const int kNumChannels = 4;
 // -----------------------------------------------------------------------------
 // Common utilities.

-#if defined(WEBP_HAVE_GIF)
 // Returns true if the frame covers the full canvas.
 static int IsFullFrame(int width, int height,
                       int canvas_width, int canvas_height) {
  return (width == canvas_width && height == canvas_height);
 }
-#endif // WEBP_HAVE_GIF

 static int CheckSizeForOverflow(uint64_t size) {
  return (size == (size_t)size);
@@ -87,7 +85,6 @@ void ClearAnimatedImage(AnimatedImage* const image) {
  }
 }

-#if defined(WEBP_HAVE_GIF)
 // Clear the canvas to transparent.
 static void ZeroFillCanvas(uint8_t* rgba,
                           uint32_t canvas_width, uint32_t canvas_height) {
@@ -129,7 +126,6 @@ static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
    dst += stride;
  }
 }
-#endif // WEBP_HAVE_GIF

 // Canonicalize all transparent pixels to transparent black to aid comparison.
 static void CleanupTransparentPixels(uint32_t* rgba,
@@ -156,8 +152,6 @@ static int DumpFrame(const char filename[], const char dump_folder[],
  FILE* f = NULL;
  const char* row;

-  if (dump_folder == NULL) dump_folder = ".";
-
  base_name = strrchr(filename, '/');
  base_name = (base_name == NULL) ? filename : base_name + 1;
  max_len = strlen(dump_folder) + 1 + strlen(base_name)
@@ -206,7 +200,7 @@ static int IsWebP(const WebPData* const webp_data) {
  return (WebPGetInfo(webp_data->bytes, webp_data->size, NULL, NULL) != 0);
 }

-// Read animated WebP bitstream 'webp_data' into 'AnimatedImage' struct.
+// Read animated WebP bitstream 'file_str' into 'AnimatedImage' struct.
 static int ReadAnimatedWebP(const char filename[],
                            const WebPData* const webp_data,
                            AnimatedImage* const image, int dump_frames,
@@ -275,7 +269,6 @@ static int ReadAnimatedWebP(const char filename[],
    prev_frame_timestamp = timestamp;
  }
  ok = dump_ok;
-  if (ok) image->format = ANIM_WEBP;

 End:
  WebPAnimDecoderDelete(dec);
@@ -285,7 +278,7 @@ static int ReadAnimatedWebP(const char filename[],
 // -----------------------------------------------------------------------------
 // GIF Decoding.

-#if defined(WEBP_HAVE_GIF)
+#ifdef WEBP_HAVE_GIF

 // Returns true if this is a valid GIF bitstream.
 static int IsGIF(const WebPData* const data) {
@@ -430,11 +423,6 @@ static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
 }

 // Find appropriate app extension and get loop count from the next extension.
-// We use Chrome's interpretation of the 'loop_count' semantics:
-//   if not present -> loop once
-//   if present and loop_count == 0, return 0 ('infinite').
-//   if present and loop_count != 0, it's the number of *extra* loops
-//     so we need to return loop_count + 1 as total loop number.
 static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
  int i;
  for (i = 0; i < gif->ImageCount; ++i) {
@@ -452,13 +440,12 @@ static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
      if (signature_is_ok &&
          eb2->Function == CONTINUE_EXT_FUNC_CODE && eb2->ByteCount >= 3 &&
          eb2->Bytes[0] == 1) {
-        const uint32_t extra_loop = ((uint32_t)(eb2->Bytes[2]) << 8) +
-                                    ((uint32_t)(eb2->Bytes[1]) << 0);
-        return (extra_loop > 0) ? extra_loop + 1 : 0;
+        return ((uint32_t)(eb2->Bytes[2]) << 8) +
+               ((uint32_t)(eb2->Bytes[1]) << 0);
      }
    }
  }
-  return 1;  // Default.
+  return 0;  // Default.
 }

 // Get duration of 'n'th frame in milliseconds.
@@ -685,7 +672,6 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
      }
    }
  }
-  image->format = ANIM_GIF;
  DGifCloseFile(gif, NULL);
  return 1;
 }
--- a/examples/anim_util.h
+++ b/examples/anim_util.h
@@ -22,11 +22,6 @@
 extern "C" {
 #endif

-typedef enum {
-  ANIM_GIF,
-  ANIM_WEBP
-} AnimatedFileFormat;
-
 typedef struct {
  uint8_t* rgba;         // Decoded and reconstructed full frame.
  int duration;          // Frame duration in milliseconds.
@@ -34,7 +29,6 @@ typedef struct {
 } DecodedFrame;

 typedef struct {
-  AnimatedFileFormat format;
  uint32_t canvas_width;
  uint32_t canvas_height;
  uint32_t bgcolor;
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@@ -463,9 +463,8 @@ static int WriteWebPWithMetadata(FILE* const out,
    } else {
      const int is_lossless = !memcmp(webp, "VP8L", kTagSize);
      if (is_lossless) {
-        // Presence of alpha is stored in the 37th bit (29th after the
-        // signature) of VP8L data.
-        if (webp[kChunkHeaderSize + 4] & (1 << 4)) flags |= kAlphaFlag;
+        // Presence of alpha is stored in the 29th bit of VP8L data.
+        if (webp[kChunkHeaderSize + 3] & (1 << 5)) flags |= kAlphaFlag;
      }
      ok = ok && (fwrite(kVP8XHeader, kChunkHeaderSize, 1, out) == 1);
      ok = ok && WriteLE32(out, flags);
@@ -487,10 +486,10 @@ static int WriteWebPWithMetadata(FILE* const out,
      *metadata_written |= METADATA_XMP;
    }
    return ok;
+  } else {
+    // No metadata, just write the original image file.
+    return (fwrite(webp, webp_size, 1, out) == 1);
  }
-
-  // No metadata, just write the original image file.
-  return (fwrite(webp, webp_size, 1, out) == 1);
 }

 //------------------------------------------------------------------------------
--- a/examples/dwebp.c
+++ b/examples/dwebp.c
@@ -332,8 +332,9 @@ int main(int argc, const char *argv[]) {
      case BMP:
        output_buffer->colorspace = bitstream->has_alpha ? MODE_BGRA : MODE_BGR;
        break;
-      case TIFF:
-        output_buffer->colorspace = bitstream->has_alpha ? MODE_RGBA : MODE_RGB;
+      case TIFF:    // note: force pre-multiplied alpha
+        output_buffer->colorspace =
+            bitstream->has_alpha ? MODE_rgbA : MODE_RGB;
        break;
      case PGM:
      case RAW_YUV:
--- a/examples/gif2webp.c
+++ b/examples/gif2webp.c
@@ -72,10 +72,8 @@ static void Help(void) {
  printf("  -metadata <string> ..... comma separated list of metadata to\n");
  printf("                           ");
  printf("copy from the input to the output if present\n");
-  printf("                           ");
-  printf("Valid values: all, none, icc, xmp (default)\n");
-  printf("  -loop_compatibility .... use compatibility mode for Chrome\n");
-  printf("                           version prior to M62 (inclusive)\n");
+  printf("                           "
+         "Valid values: all, none, icc, xmp (default)\n");
  printf("  -mt .................... use multi-threading if available\n");
  printf("\n");
  printf("  -version ............... print version number and exit\n");
@@ -106,7 +104,7 @@ int main(int argc, const char *argv[]) {
  WebPAnimEncoderOptions enc_options;
  WebPConfig config;

-  int frame_number = 0;     // Whether we are processing the first frame.
+  int is_first_frame = 1;     // Whether we are processing the first frame.
  int done;
  int c;
  int quiet = 0;
@@ -117,9 +115,8 @@ int main(int argc, const char *argv[]) {
  int stored_icc = 0;         // Whether we have already stored an ICC profile.
  WebPData xmp_data;
  int stored_xmp = 0;         // Whether we have already stored an XMP profile.
-  int loop_count = 0;         // default: infinite
+  int loop_count = 0;
  int stored_loop_count = 0;  // Whether we have found an explicit loop count.
-  int loop_compatibility = 0;
  WebPMux* mux = NULL;

  int default_kmin = 1;  // Whether to use default kmin value.
@@ -154,8 +151,6 @@ int main(int argc, const char *argv[]) {
    } else if (!strcmp(argv[c], "-mixed")) {
      enc_options.allow_mixed = 1;
      config.lossless = 0;
-    } else if (!strcmp(argv[c], "-loop_compatibility")) {
-      loop_compatibility = 1;
    } else if (!strcmp(argv[c], "-q") && c < argc - 1) {
      config.quality = ExUtilGetFloat(argv[++c], &parse_error);
    } else if (!strcmp(argv[c], "-m") && c < argc - 1) {
@@ -282,7 +277,7 @@ int main(int argc, const char *argv[]) {

        if (!DGifGetImageDesc(gif)) goto End;

-        if (frame_number == 0) {
+        if (is_first_frame) {
          if (verbose) {
            printf("Canvas screen: %d x %d\n", gif->SWidth, gif->SHeight);
          }
@@ -324,6 +319,7 @@ int main(int argc, const char *argv[]) {
                    "a memory error.\n");
            goto End;
          }
+          is_first_frame = 0;
        }

        // Some even more broken GIF can have sub-rect with zero width/height.
@@ -340,11 +336,7 @@ int main(int argc, const char *argv[]) {
        GIFBlendFrames(&frame, &gif_rect, &curr_canvas);

        if (!WebPAnimEncoderAdd(enc, &curr_canvas, frame_timestamp, &config)) {
-          fprintf(stderr, "Error while adding frame #%d: %s\n", frame_number,
-                  WebPAnimEncoderGetError(enc));
-          goto End;
-        } else {
-          ++frame_number;
+          fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
        }

        // Update canvases.
@@ -394,7 +386,7 @@ int main(int argc, const char *argv[]) {
              if (verbose) {
                fprintf(stderr, "Loop count: %d\n", loop_count);
              }
-              stored_loop_count = loop_compatibility ? (loop_count != 0) : 1;
+              stored_loop_count = (loop_count != 0);
            } else {  // An extension containing metadata.
              // We only store the first encountered chunk of each type, and
              // only if requested by the user.
@@ -451,23 +443,6 @@ int main(int argc, const char *argv[]) {
    goto End;
  }

-  if (!loop_compatibility) {
-    if (!stored_loop_count) {
-      // if no loop-count element is seen, the default is '1' (loop-once)
-      // and we need to signal it explicitly in WebP. Note however that
-      // in case there's a single frame, we still don't need to store it.
-      if (frame_number > 1) {
-        stored_loop_count = 1;
-        loop_count = 1;
-      }
-    } else if (loop_count > 0 && loop_count < 65535) {
-      // adapt GIF's semantic to WebP's (except in the infinite-loop case)
-      loop_count += 1;
-    }
-  }
-  // loop_count of 0 is the default (infinite), so no need to signal it
-  if (loop_count == 0) stored_loop_count = 0;
-
  if (stored_loop_count || stored_icc || stored_xmp) {
    // Re-mux to add loop count and/or metadata as needed.
    mux = WebPMuxCreate(&webp_data, 1);
--- a/examples/gifdec.c
+++ b/examples/gifdec.c
@@ -28,17 +28,11 @@
 #define GIF_DISPOSE_SHIFT     2

 // from utils/utils.h
-#ifdef __cplusplus
-extern "C" {
-#endif
 extern void WebPCopyPlane(const uint8_t* src, int src_stride,
                          uint8_t* dst, int dst_stride,
                          int width, int height);
 extern void WebPCopyPixels(const WebPPicture* const src,
                           WebPPicture* const dst);
-#ifdef __cplusplus
-}
-#endif

 void GIFGetBackgroundColor(const ColorMapObject* const color_map,
                           int bgcolor_index, int transparent_index,
--- a/examples/vwebp.c
+++ b/examples/vwebp.c
@@ -248,9 +248,9 @@ static void HandleKey(unsigned char key, int pos_x, int pos_y) {
      }
    }
  } else if (key == 'i') {
-    // Note: doesn't handle refresh of animation's last-frame (it's quite
-    // more involved to do, since you need to save the previous frame).
    kParams.print_info = 1 - kParams.print_info;
+    // TODO(skal): handle refresh of animation's last-frame too. It's quite
+    // more involved though (need to save the previous frame).
    if (!kParams.has_animation) ClearPreviousFrame();
    glutPostRedisplay();
  } else if (key == 'd') {
@@ -260,8 +260,8 @@ static void HandleKey(unsigned char key, int pos_x, int pos_y) {
 }

 static void HandleReshape(int width, int height) {
-  // Note: reshape doesn't preserve aspect ratio, and might
-  // be handling larger-than-screen pictures incorrectly.
+  // TODO(skal): should we preserve aspect ratio?
+  // Also: handle larger-than-screen pictures correctly.
  glViewport(0, 0, width, height);
  glMatrixMode(GL_PROJECTION);
  glLoadIdentity();
@@ -378,23 +378,13 @@ static void HandleDisplay(void) {
    }
  }
  glPopMatrix();
-#if defined(__APPLE__) || defined(_WIN32)
  glFlush();
-#else
-  glutSwapBuffers();
-#endif
 }

 static void StartDisplay(void) {
  const int width = kParams.canvas_width;
  const int height = kParams.canvas_height;
-  // TODO(webp:365) GLUT_DOUBLE results in flickering / old frames to be
-  // partially displayed with animated webp + alpha.
-#if defined(__APPLE__) || defined(_WIN32)
  glutInitDisplayMode(GLUT_RGBA);
-#else
-  glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
-#endif
  glutInitWindowSize(width, height);
  glutCreateWindow("WebP viewer");
  glutDisplayFunc(HandleDisplay);
--- a/examples/webpinfo.c
+++ b/examples/webpinfo.c
--- a/extras/Makefile.am
+++ b/extras/Makefile.am
@@ -1,4 +1,3 @@
-AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
 AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
 noinst_LTLIBRARIES = libwebpextras.la

@@ -12,30 +11,16 @@ libwebpextras_la_CPPFLAGS = $(AM_CPPFLAGS)
 libwebpextras_la_LDFLAGS = -lm
 libwebpextras_la_LIBADD = ../src/libwebp.la

-noinst_PROGRAMS =
-noinst_PROGRAMS += get_disto webp_quality
-if BUILD_VWEBP_SDL
-  noinst_PROGRAMS += vwebp_sdl
-endif
+noinst_PROGRAMS = get_disto webp_quality

-get_disto_SOURCES  = get_disto.c
-get_disto_CPPFLAGS = $(AM_CPPFLAGS)
-get_disto_LDADD =
-get_disto_LDADD += ../imageio/libimageio_util.la
-get_disto_LDADD += ../imageio/libimagedec.la
+get_disto_SOURCES = get_disto.c
+get_disto_CPPFLAGS  = $(AM_CPPFLAGS)
+get_disto_LDADD = ../imageio/libimageio_util.la ../imageio/libimagedec.la
 get_disto_LDADD += ../src/libwebp.la
 get_disto_LDADD += $(PNG_LIBS) $(JPEG_LIBS) $(TIFF_LIBS)

 webp_quality_SOURCES  = webp_quality.c
 webp_quality_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
-webp_quality_LDADD =
-webp_quality_LDADD += ../imageio/libimageio_util.la
+webp_quality_LDADD  = ../imageio/libimageio_util.la
 webp_quality_LDADD += libwebpextras.la
 webp_quality_LDADD += ../src/libwebp.la
-
-vwebp_sdl_SOURCES  = vwebp_sdl.c webp_to_sdl.c webp_to_sdl.h
-vwebp_sdl_CPPFLAGS = $(AM_CPPFLAGS) $(SDL_INCLUDES)
-vwebp_sdl_LDADD =
-vwebp_sdl_LDADD += ../imageio/libimageio_util.la
-vwebp_sdl_LDADD += ../src/libwebp.la
-vwebp_sdl_LDADD += $(SDL_LIBS)
--- a/extras/extras.c
+++ b/extras/extras.c
@@ -10,7 +10,7 @@
 //  Additional WebP utilities.
 //

-#include "extras/extras.h"
+#include "./extras.h"
 #include "webp/format_constants.h"

 #include <assert.h>
@@ -18,7 +18,7 @@

 #define XTRA_MAJ_VERSION 0
 #define XTRA_MIN_VERSION 1
-#define XTRA_REV_VERSION 1
+#define XTRA_REV_VERSION 0

 //------------------------------------------------------------------------------

--- a/extras/extras.h
+++ b/extras/extras.h
@@ -25,28 +25,28 @@ extern "C" {

 // Returns the version number of the extras library, packed in hexadecimal using
 // 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
-WEBP_EXTERN int WebPGetExtrasVersion(void);
+WEBP_EXTERN(int) WebPGetExtrasVersion(void);

 //------------------------------------------------------------------------------
 // Ad-hoc colorspace importers.

 // Import luma sample (gray scale image) into 'picture'. The 'picture'
 // width and height must be set prior to calling this function.
-WEBP_EXTERN int WebPImportGray(const uint8_t* gray, WebPPicture* picture);
+WEBP_EXTERN(int) WebPImportGray(const uint8_t* gray, WebPPicture* picture);

 // Import rgb sample in RGB565 packed format into 'picture'. The 'picture'
 // width and height must be set prior to calling this function.
-WEBP_EXTERN int WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);
+WEBP_EXTERN(int) WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);

 // Import rgb sample in RGB4444 packed format into 'picture'. The 'picture'
 // width and height must be set prior to calling this function.
-WEBP_EXTERN int WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
+WEBP_EXTERN(int) WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);

 // Import a color mapped image. The number of colors is less or equal to
 // MAX_PALETTE_SIZE. 'pic' must have been initialized. Its content, if any,
 // will be discarded. Returns 'false' in case of error, or if indexed[] contains
 // invalid indices.
-WEBP_EXTERN int
+WEBP_EXTERN(int)
 WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
                          const uint32_t palette[], int palette_size,
                          WebPPicture* pic);
@@ -59,7 +59,7 @@ WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
 // Otherwise (lossy bitstream), the returned value is in the range [0..100].
 // Any error (invalid bitstream, animated WebP, incomplete header, etc.)
 // will return a value of -1.
-WEBP_EXTERN int VP8EstimateQuality(const uint8_t* const data, size_t size);
+WEBP_EXTERN(int) VP8EstimateQuality(const uint8_t* const data, size_t size);

 //------------------------------------------------------------------------------

--- a/extras/get_disto.c
+++ b/extras/get_disto.c
@@ -24,8 +24,8 @@
 #include <string.h>

 #include "webp/encode.h"
-#include "imageio/image_dec.h"
-#include "imageio/imageio_util.h"
+#include "../imageio/image_dec.h"
+#include "../imageio/imageio_util.h"

 static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
                          int keep_alpha) {
@@ -278,7 +278,7 @@ int main(int argc, const char *argv[]) {
    goto End;
  }
  size1 = ReadPicture(name1, &pic1, 1);
-  size2 = ReadPicture(name2, &pic2, 1);
+  size2 = ReadPicture(name1, &pic2, 1);
  if (size1 == 0 || size2 == 0) goto End;

  if (!keep_alpha) {
@@ -322,7 +322,6 @@ int main(int argc, const char *argv[]) {
      fprintf(stderr, "Can only compute the difference map in ARGB format.\n");
      goto End;
    }
-#if !defined(WEBP_REDUCE_CSP)
    data_size = WebPEncodeLosslessBGRA((const uint8_t*)pic1.argb,
                                       pic1.width, pic1.height,
                                       pic1.argb_stride * 4,
@@ -334,12 +333,6 @@ int main(int argc, const char *argv[]) {
    ret = ImgIoUtilWriteFile(output, data, data_size) ? 0 : 1;
    WebPFree(data);
    if (ret) goto End;
-#else
-    (void)data;
-    (void)data_size;
-    fprintf(stderr, "Cannot save the difference map. Please recompile "
-                    "without the WEBP_REDUCE_CSP flag.\n");
-#endif  // WEBP_REDUCE_CSP
  }
  ret = 0;

--- a/extras/quality_estimate.c
+++ b/extras/quality_estimate.c
@@ -11,7 +11,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "extras/extras.h"
+#include "./extras.h"
 #include "webp/decode.h"

 #include <math.h>
--- a/extras/vwebp_sdl.c
+++ b/extras/vwebp_sdl.c
@@ -1,96 +0,0 @@
-// Copyright 2017 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Simple SDL-based WebP file viewer.
-// Does not support animation, just static images.
-//
-// Press 'q' to exit.
-//
-// Author: James Zern (jzern@google.com)
-
-#include <stdio.h>
-
-#ifdef HAVE_CONFIG_H
-#include "webp/config.h"
-#endif
-
-#if defined(WEBP_HAVE_SDL)
-
-#include "webp_to_sdl.h"
-#include "webp/decode.h"
-#include "imageio/imageio_util.h"
-
-#if defined(WEBP_HAVE_JUST_SDL_H)
-#include <SDL.h>
-#else
-#include <SDL/SDL.h>
-#endif
-
-static void ProcessEvents(void) {
-  int done = 0;
-  SDL_Event event;
-  while (!done && SDL_WaitEvent(&event)) {
-    switch (event.type) {
-      case SDL_KEYUP:
-        switch (event.key.keysym.sym) {
-          case SDLK_q: done = 1; break;
-          default: break;
-        }
-        break;
-      default: break;
-    }
-  }
-}
-
-int main(int argc, char* argv[]) {
-  int c;
-  int ok = 0;
-  for (c = 1; c < argc; ++c) {
-    const char* file = NULL;
-    const uint8_t* webp = NULL;
-    size_t webp_size = 0;
-    if (!strcmp(argv[c], "-h")) {
-      printf("Usage: %s [-h] image.webp [more_files.webp...]\n", argv[0]);
-      return 0;
-    } else {
-      file = argv[c];
-    }
-    if (file == NULL) continue;
-    if (!ImgIoUtilReadFile(file, &webp, &webp_size)) {
-      fprintf(stderr, "Error opening file: %s\n", file);
-      goto Error;
-    }
-    if (webp_size != (size_t)(int)webp_size) {
-      fprintf(stderr, "File too large.\n");
-      goto Error;
-    }
-    ok = WebpToSDL((const char*)webp, (int)webp_size);
-    free((void*)webp);
-    if (!ok) {
-      fprintf(stderr, "Error decoding file %s\n", file);
-      goto Error;
-    }
-    ProcessEvents();
-  }
-  ok = 1;
-
- Error:
-  SDL_Quit();
-  return ok ? 0 : 1;
-}
-
-#else  // !WEBP_HAVE_SDL
-
-int main(int argc, const char *argv[]) {
-  fprintf(stderr, "SDL support not enabled in %s.\n", argv[0]);
-  (void)argc;
-  return 0;
-}
-
-#endif
--- a/extras/webp_quality.c
+++ b/extras/webp_quality.c
@@ -11,8 +11,8 @@
 #include <stdlib.h>
 #include <string.h>

-#include "extras/extras.h"
-#include "imageio/imageio_util.h"
+#include "./extras.h"
+#include "../imageio/imageio_util.h"

 int main(int argc, const char *argv[]) {
  int c;
--- a/extras/webp_to_sdl.c
+++ b/extras/webp_to_sdl.c
@@ -1,110 +0,0 @@
-// Copyright 2017 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//  Simple WebP-to-SDL wrapper. Useful for emscripten.
-//
-// Author: James Zern (jzern@google.com)
-
-#ifdef HAVE_CONFIG_H
-#include "webp/config.h"
-#endif
-
-#if defined(WEBP_HAVE_SDL)
-
-#include "webp_to_sdl.h"
-
-#include <stdio.h>
-#include "webp/decode.h"
-
-#if defined(WEBP_HAVE_JUST_SDL_H)
-#include <SDL.h>
-#else
-#include <SDL/SDL.h>
-#endif
-
-static int init_ok = 0;
-int WebpToSDL(const char* data, unsigned int data_size) {
-  int ok = 0;
-  VP8StatusCode status;
-  WebPDecoderConfig config;
-  WebPBitstreamFeatures* const input = &config.input;
-  WebPDecBuffer* const output = &config.output;
-  SDL_Surface* screen = NULL;
-  SDL_Surface* surface = NULL;
-
-  if (!WebPInitDecoderConfig(&config)) {
-    fprintf(stderr, "Library version mismatch!\n");
-    return 1;
-  }
-
-  if (!init_ok) {
-    SDL_Init(SDL_INIT_VIDEO);
-    init_ok = 1;
-  }
-
-  status = WebPGetFeatures((uint8_t*)data, (size_t)data_size, &config.input);
-  if (status != VP8_STATUS_OK) goto Error;
-
-  screen = SDL_SetVideoMode(input->width, input->height, 32, SDL_SWSURFACE);
-  if (screen == NULL) {
-    fprintf(stderr, "Unable to set video mode (32bpp %dx%d)!\n",
-            input->width, input->height);
-    goto Error;
-  }
-
-  surface = SDL_CreateRGBSurface(SDL_SWSURFACE,
-                                 input->width, input->height, 32,
-                                 0x000000ffu,   // R mask
-                                 0x0000ff00u,   // G mask
-                                 0x00ff0000u,   // B mask
-                                 0xff000000u);  // A mask
-
-  if (surface == NULL) {
-    fprintf(stderr, "Unable to create %dx%d RGBA surface!\n",
-            input->width, input->height);
-    goto Error;
-  }
-  if (SDL_MUSTLOCK(surface)) SDL_LockSurface(surface);
-
-#if SDL_BYTEORDER == SDL_BIG_ENDIAN
-  output->colorspace = MODE_BGRA;
-#else
-  output->colorspace = MODE_RGBA;
-#endif
-  output->width  = surface->w;
-  output->height = surface->h;
-  output->u.RGBA.rgba   = surface->pixels;
-  output->u.RGBA.stride = surface->pitch;
-  output->u.RGBA.size   = surface->pitch * surface->h;
-  output->is_external_memory = 1;
-
-  status = WebPDecode((const uint8_t*)data, (size_t)data_size, &config);
-  if (status != VP8_STATUS_OK) {
-    fprintf(stderr, "Error decoding image (%d)\n", status);
-    goto Error;
-  }
-
-  if (SDL_MUSTLOCK(surface)) SDL_UnlockSurface(surface);
-  if (SDL_BlitSurface(surface, NULL, screen, NULL) ||
-      SDL_Flip(screen)) {
-    goto Error;
-  }
-
-  ok = 1;
-
- Error:
-  SDL_FreeSurface(surface);
-  SDL_FreeSurface(screen);
-  WebPFreeDecBuffer(output);
-  return ok;
-}
-
-//------------------------------------------------------------------------------
-
-#endif  // WEBP_HAVE_SDL
--- a/extras/webp_to_sdl.h
+++ b/extras/webp_to_sdl.h
@@ -1,22 +0,0 @@
-// Copyright 2017 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//  Simple WebP-to-SDL wrapper. Useful for emscripten.
-//
-// Author: James Zern (jzern@google.com)
-
-#ifndef WEBP_EXTRAS_WEBP_TO_SDL_H_
-#define WEBP_EXTRAS_WEBP_TO_SDL_H_
-
-// Exports the method WebpToSDL(const char* data, int data_size) which decodes
-// a WebP bitstream into an RGBA SDL surface.
-// Return false on failure.
-extern int WebpToSDL(const char* data, unsigned int data_size);
-
-#endif  // WEBP_EXTRAS_WEBP_TO_SDL_H_
--- a/imageio/Android.mk
+++ b/imageio/Android.mk
@@ -25,7 +25,6 @@ LOCAL_SRC_FILES := \
    jpegdec.c \
    metadata.c \
    pngdec.c \
-    pnmdec.c \
    tiffdec.c \
    webpdec.c \

--- a/imageio/Makefile.am
+++ b/imageio/Makefile.am
@@ -1,29 +1,22 @@
 AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
-noinst_LTLIBRARIES =
-noinst_LTLIBRARIES += libimageio_util.la
-noinst_LTLIBRARIES += libimagedec.la
-noinst_LTLIBRARIES += libimageenc.la
+noinst_LTLIBRARIES = libimageio_util.la libimagedec.la libimageenc.la

 noinst_HEADERS =
 noinst_HEADERS += ../src/webp/decode.h
 noinst_HEADERS += ../src/webp/types.h

-libimageio_util_la_SOURCES =
-libimageio_util_la_SOURCES += imageio_util.c imageio_util.h
+libimageio_util_la_SOURCES = imageio_util.c imageio_util.h

-libimagedec_la_SOURCES  =
-libimagedec_la_SOURCES += image_dec.c image_dec.h
+libimagedec_la_SOURCES  = image_dec.c image_dec.h
 libimagedec_la_SOURCES += jpegdec.c jpegdec.h
 libimagedec_la_SOURCES += metadata.c metadata.h
 libimagedec_la_SOURCES += pngdec.c pngdec.h
-libimagedec_la_SOURCES += pnmdec.c pnmdec.h
 libimagedec_la_SOURCES += tiffdec.c tiffdec.h
 libimagedec_la_SOURCES += webpdec.c webpdec.h
 libimagedec_la_SOURCES += wicdec.c wicdec.h
 libimagedec_la_CPPFLAGS = $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
 libimagedec_la_CPPFLAGS += $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)

-libimageenc_la_SOURCES  =
-libimageenc_la_SOURCES += image_enc.c image_enc.h
+libimageenc_la_SOURCES  = image_enc.c image_enc.h
 libimageenc_la_CPPFLAGS = $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
 libimageenc_la_CPPFLAGS += $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
--- a/imageio/image_dec.c
+++ b/imageio/image_dec.c
@@ -29,10 +29,6 @@ WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,
      format = WEBP_TIFF_FORMAT;
    } else if (magic1 == 0x52494646 && magic2 == 0x57454250) {
      format = WEBP_WEBP_FORMAT;
-    } else if (((magic1 >> 24) & 0xff) == 'P') {
-      const int type = (magic1 >> 16) & 0xff;
-      // we only support 'P5 -> P7' for now.
-      if (type >= '5' && type <= '7') format = WEBP_PNM_FORMAT;
    }
  }
  return format;
@@ -55,7 +51,6 @@ WebPImageReader WebPGetImageReader(WebPInputFileFormat format) {
    case WEBP_JPEG_FORMAT: return ReadJPEG;
    case WEBP_TIFF_FORMAT: return ReadTIFF;
    case WEBP_WEBP_FORMAT: return ReadWebP;
-    case WEBP_PNM_FORMAT: return ReadPNM;
    default: return FailReader;
  }
 }
--- a/imageio/image_dec.h
+++ b/imageio/image_dec.h
@@ -23,7 +23,6 @@
 #include "./metadata.h"
 #include "./jpegdec.h"
 #include "./pngdec.h"
-#include "./pnmdec.h"
 #include "./tiffdec.h"
 #include "./webpdec.h"
 #include "./wicdec.h"
@@ -37,7 +36,6 @@ typedef enum {
  WEBP_JPEG_FORMAT,
  WEBP_TIFF_FORMAT,
  WEBP_WEBP_FORMAT,
-  WEBP_PNM_FORMAT,
  WEBP_UNSUPPORTED_FORMAT
 } WebPInputFileFormat;

--- a/imageio/image_enc.c
+++ b/imageio/image_enc.c
@@ -361,8 +361,6 @@ int WebPWriteTIFF(FILE* fout, const WebPDecBuffer* const buffer) {
  const uint8_t* rgba = buffer->u.RGBA.rgba;
  const int stride = buffer->u.RGBA.stride;
  const uint8_t bytes_per_px = has_alpha ? 4 : 3;
-  const uint8_t assoc_alpha =
-      WebPIsPremultipliedMode(buffer->colorspace) ? 1 : 2;
  // For non-alpha case, we omit tag 0x152 (ExtraSamples).
  const uint8_t num_ifd_entries = has_alpha ? NUM_IFD_ENTRIES
                                            : NUM_IFD_ENTRIES - 1;
@@ -390,8 +388,7 @@ int WebPWriteTIFF(FILE* fout, const WebPDecBuffer* const buffer) {
        EXTRA_DATA_OFFSET + 8, 0, 0, 0,
    0x1c, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    // 154: PlanarConfiguration
    0x28, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,    // 166: ResolutionUnit (inch)
-    0x52, 0x01, 3, 0, 1, 0, 0, 0,
-        assoc_alpha, 0, 0, 0,                    // 178: ExtraSamples: rgbA/RGBA
+    0x52, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,    // 178: ExtraSamples: rgbA
    0, 0, 0, 0,                                  // 190: IFD terminator
    // EXTRA_DATA_OFFSET:
    8, 0, 8, 0, 8, 0, 8, 0,      // BitsPerSample
@@ -542,24 +539,22 @@ int WebPWriteYUV(FILE* fout, const WebPDecBuffer* const buffer) {
 // Generic top-level call

 int WebPSaveImage(const WebPDecBuffer* const buffer,
-                  WebPOutputFileFormat format,
-                  const char* const out_file_name) {
+                  WebPOutputFileFormat format, const char* const out_file) {
  FILE* fout = NULL;
  int needs_open_file = 1;
-  const int use_stdout = (out_file_name != NULL) && !strcmp(out_file_name, "-");
+  const int use_stdout = (out_file != NULL) && !strcmp(out_file, "-");
  int ok = 1;

-  if (buffer == NULL || out_file_name == NULL) return 0;
+  if (buffer == NULL || out_file == NULL) return 0;

 #ifdef HAVE_WINCODEC_H
  needs_open_file = (format != PNG);
 #endif

  if (needs_open_file) {
-    fout = use_stdout ? ImgIoUtilSetBinaryMode(stdout)
-                      : fopen(out_file_name, "wb");
+    fout = use_stdout ? ImgIoUtilSetBinaryMode(stdout) : fopen(out_file, "wb");
    if (fout == NULL) {
-      fprintf(stderr, "Error opening output file %s\n", out_file_name);
+      fprintf(stderr, "Error opening output file %s\n", out_file);
      return 0;
    }
  }
@@ -568,7 +563,7 @@ int WebPSaveImage(const WebPDecBuffer* const buffer,
      format == RGBA || format == BGRA || format == ARGB ||
      format == rgbA || format == bgrA || format == Argb) {
 #ifdef HAVE_WINCODEC_H
-    ok &= WebPWritePNG(out_file_name, use_stdout, buffer);
+    ok &= WebPWritePNG(out_file, use_stdout, buffer);
 #else
    ok &= WebPWritePNG(fout, buffer);
 #endif
--- a/imageio/imageio_util.c
+++ b/imageio/imageio_util.c
@@ -112,7 +112,7 @@ int ImgIoUtilWriteFile(const char* const file_name,
  if (data == NULL) {
    return 0;
  }
-  out = to_stdout ? ImgIoUtilSetBinaryMode(stdout) : fopen(file_name, "wb");
+  out = to_stdout ? stdout : fopen(file_name, "wb");
  if (out == NULL) {
    fprintf(stderr, "Error! Cannot open output file '%s'\n", file_name);
    return 0;
@@ -137,11 +137,7 @@ void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,

 int ImgIoUtilCheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
  const uint64_t total_size = nmemb * size;
-  int ok = (total_size == (size_t)total_size);
-#if defined(WEBP_MAX_IMAGE_SIZE)
-  ok = ok && (total_size <= (uint64_t)WEBP_MAX_IMAGE_SIZE);
-#endif
-  return ok;
+  return (total_size == (size_t)total_size);
 }

 // -----------------------------------------------------------------------------
--- a/imageio/jpegdec.c
+++ b/imageio/jpegdec.c
@@ -304,18 +304,18 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,

  if (stride != (int)stride ||
      !ImgIoUtilCheckSizeArgumentsOverflow(stride, height)) {
-    goto Error;
+    goto End;
  }

  rgb = (uint8_t*)malloc((size_t)stride * height);
  if (rgb == NULL) {
-    goto Error;
+    goto End;
  }
  buffer[0] = (JSAMPLE*)rgb;

  while (dinfo.output_scanline < dinfo.output_height) {
    if (jpeg_read_scanlines((j_decompress_ptr)&dinfo, buffer, 1) != 1) {
-      goto Error;
+      goto End;
    }
    buffer[0] += stride;
  }
--- a/imageio/pnmdec.c
+++ b/imageio/pnmdec.c
@@ -1,257 +0,0 @@
-// Copyright 2017 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// (limited) PNM decoder
-
-#include "./pnmdec.h"
-
-#include <assert.h>
-#include <ctype.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "webp/encode.h"
-#include "./imageio_util.h"
-
-typedef enum {
-  WIDTH_FLAG      = 1 << 0,
-  HEIGHT_FLAG     = 1 << 1,
-  DEPTH_FLAG      = 1 << 2,
-  MAXVAL_FLAG     = 1 << 3,
-  TUPLE_FLAG      = 1 << 4,
-  ALL_NEEDED_FLAGS = 0x1f
-} PNMFlags;
-
-typedef struct {
-  const uint8_t* data;
-  size_t data_size;
-  int width, height;
-  int bytes_per_px;   // 1, 3, 4
-  int depth;
-  int max_value;
-  int type;           // 5, 6 or 7
-  int seen_flags;
-} PNMInfo;
-
-// -----------------------------------------------------------------------------
-// PNM decoding
-
-#define MAX_LINE_SIZE 1024
-static const size_t kMinPNMHeaderSize = 3;
-
-static size_t ReadLine(const uint8_t* const data, size_t off, size_t data_size,
-                       char out[MAX_LINE_SIZE + 1], size_t* const out_size) {
-  size_t i = 0;
-  *out_size = 0;
- redo:
-  for (i = 0; i < MAX_LINE_SIZE && off < data_size; ++i) {
-    out[i] = data[off++];
-    if (out[i] == '\n') break;
-  }
-  if (off < data_size) {
-    if (i == 0) goto redo;         // empty line
-    if (out[0] == '#') goto redo;  // skip comment
-  }
-  out[i] = 0;   // safety sentinel
-  *out_size = i;
-  return off;
-}
-
-static size_t FlagError(const char flag[]) {
-  fprintf(stderr, "PAM header error: flags '%s' already seen.\n", flag);
-  return 0;
-}
-
-// inspired from http://netpbm.sourceforge.net/doc/pam.html
-static size_t ReadPAMFields(PNMInfo* const info, size_t off) {
-  char out[MAX_LINE_SIZE + 1];
-  size_t out_size;
-  int tmp;
-  assert(info != NULL);
-  while (1) {
-    off = ReadLine(info->data, off, info->data_size, out, &out_size);
-    if (off == 0) return 0;
-    if (sscanf(out, "WIDTH %d", &tmp) == 1) {
-      if (info->seen_flags & WIDTH_FLAG) return FlagError("WIDTH");
-      info->seen_flags |= WIDTH_FLAG;
-      info->width = tmp;
-    } else if (sscanf(out, "HEIGHT %d", &tmp) == 1) {
-      if (info->seen_flags & HEIGHT_FLAG) return FlagError("HEIGHT");
-      info->seen_flags |= HEIGHT_FLAG;
-      info->height = tmp;
-    } else if (sscanf(out, "DEPTH %d", &tmp) == 1) {
-      if (info->seen_flags & DEPTH_FLAG) return FlagError("DEPTH");
-      info->seen_flags |= DEPTH_FLAG;
-      info->depth = tmp;
-    } else if (sscanf(out, "MAXVAL %d", &tmp) == 1) {
-      if (info->seen_flags & MAXVAL_FLAG) return FlagError("MAXVAL");
-      info->seen_flags |= MAXVAL_FLAG;
-      info->max_value = tmp;
-    } else if (!strcmp(out, "TUPLTYPE RGB_ALPHA")) {
-      info->bytes_per_px = 4;
-      info->seen_flags |= TUPLE_FLAG;
-    } else if (!strcmp(out, "TUPLTYPE RGB")) {
-      info->bytes_per_px = 3;
-      info->seen_flags |= TUPLE_FLAG;
-    } else if (!strcmp(out, "TUPLTYPE GRAYSCALE")) {
-      info->bytes_per_px = 1;
-      info->seen_flags |= TUPLE_FLAG;
-    } else if (!strcmp(out, "ENDHDR")) {
-      break;
-    } else {
-      static const char kEllipsis[] = " ...";
-      int i;
-      if (out_size > 20) sprintf(out + 20 - strlen(kEllipsis), kEllipsis);
-      for (i = 0; i < (int)strlen(out); ++i) {
-        if (!isprint(out[i])) out[i] = ' ';
-      }
-      fprintf(stderr, "PAM header error: unrecognized entry [%s]\n", out);
-      return 0;
-    }
-  }
-  if (!(info->seen_flags & TUPLE_FLAG)) {
-    if (info->depth > 0 && info->depth <= 4) {
-      info->seen_flags |= TUPLE_FLAG;
-      info->bytes_per_px = info->depth * (info->max_value > 255 ? 2 : 1);
-    } else {
-      fprintf(stderr, "PAM: invalid bitdepth (%d).\n", info->depth);
-      return 0;
-    }
-  }
-  if (info->seen_flags != ALL_NEEDED_FLAGS) {
-    fprintf(stderr, "PAM: incomplete header.\n");
-    return 0;
-  }
-  return off;
-}
-
-static size_t ReadHeader(PNMInfo* const info) {
-  size_t off = 0;
-  char out[MAX_LINE_SIZE + 1];
-  size_t out_size;
-  if (info == NULL) return 0;
-  if (info->data == NULL || info->data_size < kMinPNMHeaderSize) return 0;
-
-  info->width = info->height = 0;
-  info->type = -1;
-  info->seen_flags = 0;
-  info->bytes_per_px = 0;
-  info->depth = 0;
-  info->max_value = 0;
-
-  off = ReadLine(info->data, off, info->data_size, out, &out_size);
-  if (off == 0 || sscanf(out, "P%d", &info->type) != 1) return 0;
-  if (info->type == 7) {
-    off = ReadPAMFields(info, off);
-  } else {
-    off = ReadLine(info->data, off, info->data_size, out, &out_size);
-    if (off == 0 || sscanf(out, "%d %d", &info->width, &info->height) != 2) {
-      return 0;
-    }
-    off = ReadLine(info->data, off, info->data_size, out, &out_size);
-    if (off == 0 || sscanf(out, "%d", &info->max_value) != 1) return 0;
-
-    // finish initializing missing fields
-    info->depth = (info->type == 5) ? 1 : 3;
-    info->bytes_per_px = info->depth * (info->max_value > 255 ? 2 : 1);
-  }
-  // perform some basic numerical validation
-  if (info->width <= 0 || info->height <= 0 ||
-      info->type <= 0 || info->type >= 9 ||
-      info->depth <= 0 || info->depth > 4 ||
-      info->bytes_per_px < info->depth ||
-      info->max_value <= 0 || info->max_value >= 65536) {
-    return 0;
-  }
-  return off;
-}
-
-int ReadPNM(const uint8_t* const data, size_t data_size,
-            WebPPicture* const pic, int keep_alpha,
-            struct Metadata* const metadata) {
-  int ok = 0;
-  int i, j;
-  uint64_t stride, pixel_bytes;
-  uint8_t* rgb = NULL, *tmp_rgb;
-  size_t offset;
-  PNMInfo info;
-
-  info.data = data;
-  info.data_size = data_size;
-  offset = ReadHeader(&info);
-  if (offset == 0) {
-    fprintf(stderr, "Error parsing PNM header.\n");
-    goto End;
-  }
-
-  if (info.type < 5 || info.type > 7) {
-    fprintf(stderr, "Unsupported P%d PNM format.\n", info.type);
-    goto End;
-  }
-
-  // Some basic validations.
-  if (pic == NULL) goto End;
-  if (info.width > WEBP_MAX_DIMENSION || info.height > WEBP_MAX_DIMENSION) {
-    fprintf(stderr, "Invalid %dx%d dimension for PNM\n",
-                    info.width, info.height);
-    goto End;
-  }
-
-  pixel_bytes = (uint64_t)info.width * info.height * info.bytes_per_px;
-  if (data_size < offset + pixel_bytes) {
-    fprintf(stderr, "Truncated PNM file (P%d).\n", info.type);
-    goto End;
-  }
-  stride =
-      (uint64_t)(info.bytes_per_px < 3 ? 3 : info.bytes_per_px) * info.width;
-  if (stride != (size_t)stride ||
-      !ImgIoUtilCheckSizeArgumentsOverflow(stride, info.height)) {
-    goto End;
-  }
-
-  rgb = (uint8_t*)malloc((size_t)stride * info.height);
-  if (rgb == NULL) goto End;
-
-  // Convert input
-  tmp_rgb = rgb;
-  for (j = 0; j < info.height; ++j) {
-    assert(offset + info.bytes_per_px * info.width <= data_size);
-    if (info.depth == 1) {
-      // convert grayscale -> RGB
-      for (i = 0; i < info.width; ++i) {
-        const uint8_t v = data[offset + i];
-        tmp_rgb[3 * i + 0] = tmp_rgb[3 * i + 1] = tmp_rgb[3 * i + 2] = v;
-      }
-    } else if (info.depth == 3) {   // RGB
-      memcpy(tmp_rgb, data + offset, 3 * info.width * sizeof(*data));
-    } else if (info.depth == 4) {   // RGBA
-      memcpy(tmp_rgb, data + offset, 4 * info.width * sizeof(*data));
-    }
-    offset += info.bytes_per_px * info.width;
-    tmp_rgb += stride;
-  }
-
-  // WebP conversion.
-  pic->width = info.width;
-  pic->height = info.height;
-  ok = (info.depth == 4) ? WebPPictureImportRGBA(pic, rgb, (int)stride)
-                         : WebPPictureImportRGB(pic, rgb, (int)stride);
-  if (!ok) goto End;
-
-  ok = 1;
- End:
-  free((void*)rgb);
-
-  (void)metadata;
-  (void)keep_alpha;
-  return ok;
-}
-
-// -----------------------------------------------------------------------------
--- a/imageio/pnmdec.h
+++ b/imageio/pnmdec.h
@@ -1,37 +0,0 @@
-// Copyright 2017 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// partial PNM format decoder (ppm/pgm)
-
-#ifndef WEBP_IMAGEIO_PNMDEC_H_
-#define WEBP_IMAGEIO_PNMDEC_H_
-
-#include "webp/types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct Metadata;
-struct WebPPicture;
-
-// Reads a PNM file from 'data', returning the decoded output in 'pic'.
-// The output is RGB or YUV depending on pic->use_argb value.
-// Returns true on success.
-// 'metadata' has no effect, but is kept for coherence with other signatures
-// for image readers.
-int ReadPNM(const uint8_t* const data, size_t data_size,
-            struct WebPPicture* const pic, int keep_alpha,
-            struct Metadata* const metadata);
-
-#ifdef __cplusplus
-}    // extern "C"
-#endif
-
-#endif  // WEBP_IMAGEIO_PNMDEC_H_
--- a/imageio/tiffdec.c
+++ b/imageio/tiffdec.c
@@ -15,7 +15,6 @@
 #include "webp/config.h"
 #endif

-#include <limits.h>
 #include <stdio.h>
 #include <string.h>

@@ -108,7 +107,7 @@ static void MyUnmapFile(thandle_t opaque, void* base, toff_t size) {
 static tsize_t MyRead(thandle_t opaque, void* dst, tsize_t size) {
  MyData* const my_data = (MyData*)opaque;
  if (my_data->pos + size > my_data->size) {
-    size = (tsize_t)(my_data->size - my_data->pos);
+    size = my_data->size - my_data->pos;
  }
  if (size > 0) {
    memcpy(dst, my_data->data + my_data->pos, size);
@@ -117,55 +116,18 @@ static tsize_t MyRead(thandle_t opaque, void* dst, tsize_t size) {
  return size;
 }

-// Unmultiply Argb data. Taken from dsp/alpha_processing
-// (we don't want to force a dependency to a libdspdec library).
-#define MFIX 24    // 24bit fixed-point arithmetic
-#define HALF ((1u << MFIX) >> 1)
-#define KINV_255 ((1u << MFIX) / 255u)
-
-static uint32_t Unmult(uint8_t x, uint32_t mult) {
-  const uint32_t v = (x * mult + HALF) >> MFIX;
-  return (v > 255u) ? 255u : v;
-}
-
-static WEBP_INLINE uint32_t GetScale(uint32_t a) {
-  return (255u << MFIX) / a;
-}
-
-static void MultARGBRow(uint8_t* ptr, int width) {
-  int x;
-  for (x = 0; x < width; ++x, ptr += 4) {
-    const uint32_t alpha = ptr[3];
-    if (alpha < 255) {
-      if (alpha == 0) {   // alpha == 0
-        ptr[0] = ptr[1] = ptr[2] = 0;
-      } else {
-        const uint32_t scale = GetScale(alpha);
-        ptr[0] = Unmult(ptr[0], scale);
-        ptr[1] = Unmult(ptr[1], scale);
-        ptr[2] = Unmult(ptr[2], scale);
-      }
-    }
-  }
-}
-
 int ReadTIFF(const uint8_t* const data, size_t data_size,
             WebPPicture* const pic, int keep_alpha,
             Metadata* const metadata) {
  MyData my_data = { data, (toff_t)data_size, 0 };
  TIFF* tif;
-  uint32_t width, height;
-  uint16_t samples_per_px = 0;
-  uint16_t extra_samples = 0;
-  uint16_t* extra_samples_ptr = NULL;
-  uint32_t* raster;
+  uint32 width, height;
+  uint32* raster;
  int64_t alloc_size;
  int ok = 0;
  tdir_t dircount;

-  if (data == NULL || data_size == 0 || data_size > INT_MAX || pic == NULL) {
-    return 0;
-  }
+  if (data == NULL || data_size == 0 || pic == NULL) return 0;

  tif = TIFFClientOpen("Memory", "r", &my_data,
                       MyRead, MyRead, MySeek, MyClose,
@@ -181,27 +143,17 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
                    "Only the first will be used, %d will be ignored.\n",
                    dircount - 1);
  }
-  if (!TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &samples_per_px)) {
-    fprintf(stderr, "Error! Cannot retrieve TIFF samples-per-pixel info.\n");
-    goto End;
-  }
-  if (samples_per_px < 3 || samples_per_px > 4) goto End;  // not supported

  if (!(TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &width) &&
        TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height))) {
    fprintf(stderr, "Error! Cannot retrieve TIFF image dimensions.\n");
    goto End;
  }
+
  if (!ImgIoUtilCheckSizeArgumentsOverflow((uint64_t)width * height,
                                           sizeof(*raster))) {
    goto End;
  }
-  if (samples_per_px > 3 && !TIFFGetField(tif, TIFFTAG_EXTRASAMPLES,
-                                          &extra_samples, &extra_samples_ptr)) {
-    fprintf(stderr, "Error! Cannot retrieve TIFF ExtraSamples info.\n");
-    goto End;
-  }
-
  // _Tiffmalloc uses a signed type for size.
  alloc_size = (int64_t)((uint64_t)width * height * sizeof(*raster));
  if (alloc_size < 0 || alloc_size != (tsize_t)alloc_size) goto End;
@@ -217,16 +169,6 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
 #ifdef WORDS_BIGENDIAN
      TIFFSwabArrayOfLong(raster, width * height);
 #endif
-      // if we have an alpha channel, we must un-multiply from rgbA to RGBA
-      if (extra_samples == 1 && extra_samples_ptr != NULL &&
-          extra_samples_ptr[0] == EXTRASAMPLE_ASSOCALPHA) {
-        uint32_t y;
-        uint8_t* tmp = (uint8_t*)raster;
-        for (y = 0; y < height; ++y) {
-          MultARGBRow(tmp, width);
-          tmp += stride;
-        }
-      }
      ok = keep_alpha
         ? WebPPictureImportRGBA(pic, (const uint8_t*)raster, stride)
         : WebPPictureImportRGBX(pic, (const uint8_t*)raster, stride);
--- a/imageio/webpdec.c
+++ b/imageio/webpdec.c
@@ -9,10 +9,6 @@
 //
 // WebP decode.

-#ifdef HAVE_CONFIG_H
-#include "webp/config.h"
-#endif
-
 #include "./webpdec.h"

 #include <stdio.h>
@@ -142,68 +138,46 @@ int ReadWebP(const uint8_t* const data, size_t data_size,
    PrintWebPError("input data", status);
    return 0;
  }
-
-  do {
+  {
    const int has_alpha = keep_alpha && bitstream->has_alpha;
-    uint64_t stride;
-    pic->width = bitstream->width;
-    pic->height = bitstream->height;
    if (pic->use_argb) {
-      stride = (uint64_t)bitstream->width * 4;
-    } else {
-      stride = (uint64_t)bitstream->width * (has_alpha ? 5 : 3) / 2;
-      pic->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
-    }
-
-    if (!ImgIoUtilCheckSizeArgumentsOverflow(stride, bitstream->height)) {
-      status = VP8_STATUS_OUT_OF_MEMORY;
-      break;
-    }
-
-    ok = WebPPictureAlloc(pic);
-    if (!ok) {
-      status = VP8_STATUS_OUT_OF_MEMORY;
-      break;
-    }
-    if (pic->use_argb) {
-#ifdef WORDS_BIGENDIAN
-      output_buffer->colorspace = MODE_ARGB;
-#else
-      output_buffer->colorspace = MODE_BGRA;
-#endif
-      output_buffer->u.RGBA.rgba = (uint8_t*)pic->argb;
-      output_buffer->u.RGBA.stride = pic->argb_stride * sizeof(uint32_t);
-      output_buffer->u.RGBA.size = output_buffer->u.RGBA.stride * pic->height;
+      output_buffer->colorspace = has_alpha ? MODE_RGBA : MODE_RGB;
    } else {
      output_buffer->colorspace = has_alpha ? MODE_YUVA : MODE_YUV;
-      output_buffer->u.YUVA.y = pic->y;
-      output_buffer->u.YUVA.u = pic->u;
-      output_buffer->u.YUVA.v = pic->v;
-      output_buffer->u.YUVA.a = has_alpha ? pic->a : NULL;
-      output_buffer->u.YUVA.y_stride = pic->y_stride;
-      output_buffer->u.YUVA.u_stride = pic->uv_stride;
-      output_buffer->u.YUVA.v_stride = pic->uv_stride;
-      output_buffer->u.YUVA.a_stride = has_alpha ? pic->a_stride : 0;
-      output_buffer->u.YUVA.y_size = pic->height * pic->y_stride;
-      output_buffer->u.YUVA.u_size = (pic->height + 1) / 2 * pic->uv_stride;
-      output_buffer->u.YUVA.v_size = (pic->height + 1) / 2 * pic->uv_stride;
-      output_buffer->u.YUVA.a_size = pic->height * pic->a_stride;
    }
-    output_buffer->is_external_memory = 1;

    status = DecodeWebP(data, data_size, &config);
-    ok = (status == VP8_STATUS_OK);
-    if (!ok) WebPPictureFree(pic);
-    if (ok && !keep_alpha && pic->use_argb) {
-      // Need to wipe out the alpha value, as requested.
-      int x, y;
-      uint32_t* argb = pic->argb;
-      for (y = 0; y < pic->height; ++y) {
-        for (x = 0; x < pic->width; ++x) argb[x] |= 0xff000000u;
-        argb += pic->argb_stride;
+    if (status == VP8_STATUS_OK) {
+      pic->width = output_buffer->width;
+      pic->height = output_buffer->height;
+      if (pic->use_argb) {
+        const uint8_t* const rgba = output_buffer->u.RGBA.rgba;
+        const int stride = output_buffer->u.RGBA.stride;
+        ok = has_alpha ? WebPPictureImportRGBA(pic, rgba, stride)
+                       : WebPPictureImportRGB(pic, rgba, stride);
+      } else {
+        pic->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
+        ok = WebPPictureAlloc(pic);
+        if (!ok) {
+          status = VP8_STATUS_OUT_OF_MEMORY;
+        } else {
+          const WebPYUVABuffer* const yuva = &output_buffer->u.YUVA;
+          const int uv_width = (pic->width + 1) >> 1;
+          const int uv_height = (pic->height + 1) >> 1;
+          ImgIoUtilCopyPlane(yuva->y, yuva->y_stride,
+                             pic->y, pic->y_stride, pic->width, pic->height);
+          ImgIoUtilCopyPlane(yuva->u, yuva->u_stride,
+                             pic->u, pic->uv_stride, uv_width, uv_height);
+          ImgIoUtilCopyPlane(yuva->v, yuva->v_stride,
+                             pic->v, pic->uv_stride, uv_width, uv_height);
+          if (has_alpha) {
+            ImgIoUtilCopyPlane(yuva->a, yuva->a_stride,
+                               pic->a, pic->a_stride, pic->width, pic->height);
+          }
+        }
      }
    }
-  } while (0);   // <- so we can 'break' out of the loop
+  }

  if (status != VP8_STATUS_OK) {
    PrintWebPError("input data", status);
--- a/imageio/webpdec.h
+++ b/imageio/webpdec.h
@@ -51,7 +51,7 @@ VP8StatusCode DecodeWebPIncremental(

 //------------------------------------------------------------------------------

-// Decodes a WebP contained in 'data', returning the decoded output in 'pic'.
+// Reads a WebP from 'in_file', returning the decoded output in 'pic'.
 // Output is RGBA or YUVA, depending on pic->use_argb value.
 // If 'keep_alpha' is true and the WebP has an alpha channel, the output is RGBA
 // or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
--- a/makefile.unix
+++ b/makefile.unix
@@ -29,21 +29,9 @@ ifeq ($(strip $(shell uname)), Darwin)
  EXTRA_LIBS  += -L/opt/local/lib
  GL_LIBS = -framework GLUT -framework OpenGL
 else
-  EXTRA_FLAGS += -I/usr/local/include
-  EXTRA_LIBS  += -L/usr/local/lib
  GL_LIBS = -lglut -lGL
 endif

-# SDL flags: use sdl-config if it exists
-SDL_CONFIG = $(shell sdl-config --version 2> /dev/null)
-ifneq ($(SDL_CONFIG),)
-  SDL_LIBS = $(shell sdl-config --libs)
-  SDL_FLAGS = $(shell sdl-config --cflags)
-else
-  # use best-guess
-  SDL_LIBS = -lSDL
-  SDL_FLAGS =
-endif

 # To install libraries on Mac OS X:
 # 1. Install MacPorts (http://www.macports.org/install.php)
@@ -67,7 +55,7 @@ endif
 # EXTRA_FLAGS += -DWEBP_EXPERIMENTAL_FEATURES

 # Extra flags to enable byte swap for 16 bit colorspaces.
-# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP=1
+# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP

 # Extra flags to enable multi-threading
 EXTRA_FLAGS += -DWEBP_USE_THREAD
@@ -113,7 +101,7 @@ endif

 AR = ar
 ARFLAGS = r
-CPPFLAGS = -I. -Isrc/ -Wall
+CPPFLAGS = -Isrc/ -Wall
 CFLAGS = -O3 -DNDEBUG $(EXTRA_FLAGS)
 CC = gcc
 INSTALL = install
@@ -179,10 +167,12 @@ DSP_DEC_OBJS = \
    src/dsp/yuv.o \
    src/dsp/yuv_mips32.o \
    src/dsp/yuv_mips_dsp_r2.o \
-    src/dsp/yuv_neon.o \
    src/dsp/yuv_sse2.o \

 DSP_ENC_OBJS = \
+    src/dsp/argb.o \
+    src/dsp/argb_mips_dsp_r2.o \
+    src/dsp/argb_sse2.o \
    src/dsp/cost.o \
    src/dsp/cost_mips32.o \
    src/dsp/cost_mips_dsp_r2.o \
@@ -202,13 +192,10 @@ DSP_ENC_OBJS = \
    src/dsp/lossless_enc_neon.o \
    src/dsp/lossless_enc_sse2.o \
    src/dsp/lossless_enc_sse41.o \
-    src/dsp/ssim.o \
-    src/dsp/ssim_sse2.o \

 ENC_OBJS = \
    src/enc/alpha_enc.o \
    src/enc/analysis_enc.o \
-    src/enc/backward_references_cost_enc.o \
    src/enc/backward_references_enc.o \
    src/enc/config_enc.o \
    src/enc/cost_enc.o \
@@ -236,7 +223,6 @@ EX_FORMAT_DEC_OBJS = \
    imageio/jpegdec.o \
    imageio/metadata.o \
    imageio/pngdec.o \
-    imageio/pnmdec.o \
    imageio/tiffdec.o \
    imageio/webpdec.o \

@@ -342,9 +328,8 @@ OUT_LIBS += src/libwebp.a
 EXTRA_LIB = extras/libwebpextras.a
 OUT_EXAMPLES = examples/cwebp examples/dwebp
 EXTRA_EXAMPLES = examples/gif2webp examples/vwebp examples/webpmux \
-                 examples/anim_diff examples/anim_dump \
-                 examples/img2webp examples/webpinfo
-OTHER_EXAMPLES = extras/get_disto extras/webp_quality extras/vwebp_sdl
+                 examples/anim_diff examples/img2webp
+OTHER_EXAMPLES = extras/get_disto extras/webp_quality

 OUTPUT = $(OUT_LIBS) $(OUT_EXAMPLES)
 ifeq ($(MAKECMDGOALS),clean)
@@ -371,7 +356,7 @@ src/utils/bit_reader_utils.o: src/utils/endian_inl_utils.h
 src/utils/bit_writer_utils.o: src/utils/endian_inl_utils.h

 %.o: %.c $(HDRS)
-	$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@
+	$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@

 examples/libanim_util.a: $(ANIM_UTIL_OBJS)
 examples/libexample_util.a: $(EX_UTIL_OBJS)
@@ -389,27 +374,18 @@ src/demux/libwebpdemux.a: $(LIBWEBPDEMUX_OBJS)
 	$(AR) $(ARFLAGS) $@ $^

 examples/anim_diff: examples/anim_diff.o $(ANIM_UTIL_OBJS) $(GIFDEC_OBJS)
-examples/anim_dump: examples/anim_dump.o $(ANIM_UTIL_OBJS)
 examples/cwebp: examples/cwebp.o
 examples/dwebp: examples/dwebp.o
 examples/gif2webp: examples/gif2webp.o $(GIFDEC_OBJS)
 examples/vwebp: examples/vwebp.o
 examples/webpmux: examples/webpmux.o
 examples/img2webp: examples/img2webp.o
-examples/webpinfo: examples/webpinfo.o

 examples/anim_diff: examples/libanim_util.a examples/libgifdec.a
 examples/anim_diff: src/demux/libwebpdemux.a examples/libexample_util.a
 examples/anim_diff: imageio/libimageio_util.a src/libwebp.a
 examples/anim_diff: EXTRA_LIBS += $(GIF_LIBS)
 examples/anim_diff: EXTRA_FLAGS += -DWEBP_HAVE_GIF
-examples/anim_dump: examples/libanim_util.a
-examples/anim_dump: src/demux/libwebpdemux.a
-examples/anim_dump: examples/libexample_util.a
-examples/anim_dump: imageio/libimageio_util.a
-examples/anim_dump: imageio/libimageenc.a
-examples/anim_dump: src/libwebp.a
-examples/anim_dump: EXTRA_LIBS += $(GIF_LIBS) $(DWEBP_LIBS)
 examples/cwebp: examples/libexample_util.a
 examples/cwebp: imageio/libimagedec.a
 examples/cwebp: imageio/libimageio_util.a
@@ -435,8 +411,6 @@ examples/img2webp: examples/libexample_util.a imageio/libimageio_util.a
 examples/img2webp: imageio/libimagedec.a
 examples/img2webp: src/mux/libwebpmux.a src/libwebp.a
 examples/img2webp: EXTRA_LIBS += $(CWEBP_LIBS)
-examples/webpinfo: examples/libexample_util.a imageio/libimageio_util.a
-examples/webpinfo: src/libwebpdecoder.a

 extras/get_disto: extras/get_disto.o
 extras/get_disto: imageio/libimagedec.a imageio/libimageio_util.a src/libwebp.a
@@ -446,13 +420,6 @@ extras/webp_quality: extras/webp_quality.o
 extras/webp_quality: imageio/libimageio_util.a
 extras/webp_quality: $(EXTRA_LIB) src/libwebp.a

-extras/vwebp_sdl: extras/vwebp_sdl.o
-extras/vwebp_sdl: extras/webp_to_sdl.o
-extras/vwebp_sdl: imageio/libimageio_util.a
-extras/vwebp_sdl: src/libwebp.a
-extras/vwebp_sdl: EXTRA_FLAGS += -DWEBP_HAVE_SDL $(SDL_FLAGS)
-extras/vwebp_sdl: EXTRA_LIBS += $(SDL_LIBS)
-
 $(OUT_EXAMPLES) $(EXTRA_EXAMPLES) $(OTHER_EXAMPLES):
 	$(CC) -o $@ $^ $(LDFLAGS)

@@ -468,7 +435,7 @@ dist: all
 	$(INSTALL) -m644 src/mux/libwebpmux.a $(DESTDIR)/lib
 	umask 022; \
 	for m in man/[cdv]webp.1 man/gif2webp.1 man/webpmux.1 \
-                 man/img2webp.1 man/webpinfo.1; do \
+                 man/img2webp.1; do \
 	  basenam=$$(basename $$m .1); \
 	  $(GROFF) -t -e -man -T utf8 $$m \
 	    | $(COL) -bx >$(DESTDIR)/doc/$${basenam}.txt; \
--- a/man/Makefile.am
+++ b/man/Makefile.am
@@ -8,7 +8,4 @@ endif
 if BUILD_VWEBP
  man_MANS += vwebp.1
 endif
-if BUILD_WEBPINFO
-  man_MANS += webpinfo.1
-endif
 EXTRA_DIST = $(man_MANS)
--- a/man/cwebp.1
+++ b/man/cwebp.1
@@ -98,7 +98,8 @@ Crop the source to a rectangle with top\-left corner at coordinates
 This cropping area must be fully contained within the source rectangle.
 .TP
 .B \-mt
-Use multi\-threading for encoding, if possible.
+Use multi\-threading for encoding, if possible. This option is only effective
+when using lossy compression on a source with a transparency channel.
 .TP
 .B \-low_memory
 Reduce memory usage of lossy encoding by saving four times the compressed
--- a/man/gif2webp.1
+++ b/man/gif2webp.1
@@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH GIF2WEBP 1 "September 20, 2017"
+.TH GIF2WEBP 1 "January 25, 2017"
 .SH NAME
 gif2webp \- Convert a GIF image to WebP
 .SH SYNOPSIS
@@ -108,10 +108,8 @@ the value the smoother the picture will appear. Typical values are usually in
 the range of 20 to 50.
 .TP
 .B \-mt
-Use multi-threading for encoding, if possible.
-.B \-loop_compatibility
-If enabled, handle the loop information in a compatible fashion for Chrome
-version prior to M62 (inclusive) and Firefox.
+Use multi-threading for encoding, if possible. This option is only effective
+when using lossy compression.
 .TP
 .B \-v
 Print extra information.
--- a/man/webpinfo.1
+++ b/man/webpinfo.1
@@ -1,80 +0,0 @@
-.\"                                      Hey, EMACS: -*- nroff -*-
-.TH WEBPINFO 1 "November 24, 2017"
-.SH NAME
-webpinfo \- print out the chunk level structure of WebP files
-along with basic integrity checks.
-.SH SYNOPSIS
-.B webpinfo
-.I OPTIONS
-.I INPUT
-.br
-.B webpinfo [\-h|\-help|\-H|\-longhelp]
-.br
-
-.SH DESCRIPTION
-This manual page documents the
-.B webpinfo
-command.
-.PP
-\fBwebpinfo\fP can be used to print out the chunk level structure and bitstream
-header information of WebP files. It can also check if the files are of valid
-WebP format.
-
-.SH OPTIONS
-.TP
-.B \-version
-Print the version number (as major.minor.revision) and exit.
-.TP
-.B \-quiet
-Do not show chunk parsing information.
-.TP
-.B \-diag
-Show parsing error diagnosis.
-.TP
-.B \-summary
-Show chunk stats summary.
-.TP
-.BI \-bitstream_info
-Parse bitstream header.
-.TP
-.B \-h, \-help
-A short usage summary.
-.TP
-.B \-H, \-longhelp
-Detailed usage instructions.
-
-.SH INPUT
-Input files in WebP format. Input files must come last, following
-options (if any). There can be multiple input files.
-
-.SH BUGS
-Please report all bugs to the issue tracker:
-https://bugs.chromium.org/p/webp
-.br
-Patches welcome! See this page to get started:
-http://www.webmproject.org/code/contribute/submitting\-patches/
-
-.SH EXAMPLES
-.br
-webpinfo \-h
-.br
-webpinfo \-diag \-summary input_file.webp
-.br
-webpinfo \-bitstream_info input_file_1.webp input_file_2.webp
-.br
-webpinfo *.webp
-
-.SH AUTHORS
-\fBwebpinfo\fP is a part of libwebp and was written by the WebP team.
-.br
-The latest source tree is available at
-https://chromium.googlesource.com/webm/libwebp
-.PP
-This manual page was written by Hui Su <huisu@google.com>,
-for the Debian project (and may be used by others).
-
-.SH SEE ALSO
-.BR webpmux (1)
-.br
-Please refer to http://developers.google.com/speed/webp/ for additional
-information.
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -22,7 +22,6 @@ commondir = $(includedir)/webp
 libwebp_la_SOURCES =
 libwebpinclude_HEADERS =
 libwebpinclude_HEADERS += webp/encode.h
-
 noinst_HEADERS =
 noinst_HEADERS += webp/format_constants.h

@@ -36,7 +35,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
 # other than the ones listed on the command line, i.e., after linking, it will
 # not have unresolved symbols. Some platforms (Windows among them) require all
 # symbols in shared libraries to be resolved at library creation.
-libwebp_la_LDFLAGS = -no-undefined -version-info 7:1:0
+libwebp_la_LDFLAGS = -no-undefined -version-info 7:0:0
 libwebpincludedir = $(includedir)/webp
 pkgconfig_DATA = libwebp.pc

@@ -48,7 +47,7 @@ if BUILD_LIBWEBPDECODER
  libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
  libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la

-  libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:1:0
+  libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:0:0
  pkgconfig_DATA += libwebpdecoder.pc
 endif

--- a/src/dec/Makefile.am
+++ b/src/dec/Makefile.am
@@ -1,4 +1,3 @@
-AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
 noinst_LTLIBRARIES = libwebpdecode.la

 libwebpdecode_la_SOURCES =
--- a/src/dec/alpha_dec.c
+++ b/src/dec/alpha_dec.c
@@ -12,13 +12,13 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <stdlib.h>
-#include "src/dec/alphai_dec.h"
-#include "src/dec/vp8i_dec.h"
-#include "src/dec/vp8li_dec.h"
-#include "src/dsp/dsp.h"
-#include "src/utils/quant_levels_dec_utils.h"
-#include "src/utils/utils.h"
-#include "src/webp/format_constants.h"
+#include "./alphai_dec.h"
+#include "./vp8i_dec.h"
+#include "./vp8li_dec.h"
+#include "../dsp/dsp.h"
+#include "../utils/quant_levels_dec_utils.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"

 //------------------------------------------------------------------------------
 // ALPHDecoder object.
--- a/src/dec/alphai_dec.h
+++ b/src/dec/alphai_dec.h
@@ -11,11 +11,11 @@
 //
 // Author: Urvang (urvang@google.com)

-#ifndef WEBP_DEC_ALPHAI_DEC_H_
-#define WEBP_DEC_ALPHAI_DEC_H_
+#ifndef WEBP_DEC_ALPHAI_H_
+#define WEBP_DEC_ALPHAI_H_

-#include "src/dec/webpi_dec.h"
-#include "src/utils/filters_utils.h"
+#include "./webpi_dec.h"
+#include "../utils/filters_utils.h"

 #ifdef __cplusplus
 extern "C" {
@@ -51,4 +51,4 @@ void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_ALPHAI_DEC_H_ */
+#endif  /* WEBP_DEC_ALPHAI_H_ */
--- a/src/dec/buffer_dec.c
+++ b/src/dec/buffer_dec.c
@@ -13,15 +13,15 @@

 #include <stdlib.h>

-#include "src/dec/vp8i_dec.h"
-#include "src/dec/webpi_dec.h"
-#include "src/utils/utils.h"
+#include "./vp8i_dec.h"
+#include "./webpi_dec.h"
+#include "../utils/utils.h"

 //------------------------------------------------------------------------------
 // WebPDecBuffer

 // Number of bytes per pixel for the different color-spaces.
-static const uint8_t kModeBpp[MODE_LAST] = {
+static const int kModeBpp[MODE_LAST] = {
  3, 4, 3, 4, 4, 2, 2,
  4, 4, 4, 2,    // pre-multiplied modes
  1, 1 };
@@ -36,7 +36,7 @@ static int IsValidColorspace(int webp_csp_mode) {
 // strictly speaking, the very last (or first, if flipped) row
 // doesn't require padding.
 #define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE)       \
-    ((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
+    (uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)

 static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
  int ok = 1;
@@ -74,8 +74,7 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
  } else {    // RGB checks
    const WebPRGBABuffer* const buf = &buffer->u.RGBA;
    const int stride = abs(buf->stride);
-    const uint64_t size =
-        MIN_BUFFER_SIZE(width * kModeBpp[mode], height, stride);
+    const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
    ok &= (size <= buf->size);
    ok &= (stride >= width * kModeBpp[mode]);
    ok &= (buf->rgba != NULL);
@@ -99,14 +98,9 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
    uint64_t uv_size = 0, a_size = 0, total_size;
    // We need memory and it hasn't been allocated yet.
    // => initialize output buffer, now that dimensions are known.
-    int stride;
-    uint64_t size;
+    const int stride = w * kModeBpp[mode];
+    const uint64_t size = (uint64_t)stride * h;

-    if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) {
-      return VP8_STATUS_INVALID_PARAM;
-    }
-    stride = w * kModeBpp[mode];
-    size = (uint64_t)stride * h;
    if (!WebPIsRGBMode(mode)) {
      uv_stride = (w + 1) / 2;
      uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
@@ -175,11 +169,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
  return VP8_STATUS_OK;
 }

-VP8StatusCode WebPAllocateDecBuffer(int width, int height,
+VP8StatusCode WebPAllocateDecBuffer(int w, int h,
                                    const WebPDecoderOptions* const options,
-                                    WebPDecBuffer* const buffer) {
+                                    WebPDecBuffer* const out) {
  VP8StatusCode status;
-  if (buffer == NULL || width <= 0 || height <= 0) {
+  if (out == NULL || w <= 0 || h <= 0) {
    return VP8_STATUS_INVALID_PARAM;
  }
  if (options != NULL) {    // First, apply options if there is any.
@@ -188,39 +182,33 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
      const int ch = options->crop_height;
      const int x = options->crop_left & ~1;
      const int y = options->crop_top & ~1;
-      if (x < 0 || y < 0 || cw <= 0 || ch <= 0 ||
-          x + cw > width || y + ch > height) {
+      if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) {
        return VP8_STATUS_INVALID_PARAM;   // out of frame boundary.
      }
-      width = cw;
-      height = ch;
+      w = cw;
+      h = ch;
    }
-
    if (options->use_scaling) {
-#if !defined(WEBP_REDUCE_SIZE)
      int scaled_width = options->scaled_width;
      int scaled_height = options->scaled_height;
      if (!WebPRescalerGetScaledDimensions(
-              width, height, &scaled_width, &scaled_height)) {
+              w, h, &scaled_width, &scaled_height)) {
        return VP8_STATUS_INVALID_PARAM;
      }
-      width = scaled_width;
-      height = scaled_height;
-#else
-      return VP8_STATUS_INVALID_PARAM;   // rescaling not supported
-#endif
+      w = scaled_width;
+      h = scaled_height;
    }
  }
-  buffer->width = width;
-  buffer->height = height;
+  out->width = w;
+  out->height = h;

  // Then, allocate buffer for real.
-  status = AllocateBuffer(buffer);
+  status = AllocateBuffer(out);
  if (status != VP8_STATUS_OK) return status;

  // Use the stride trick if vertical flip is needed.
  if (options != NULL && options->flip) {
-    status = WebPFlipBuffer(buffer);
+    status = WebPFlipBuffer(out);
  }
  return status;
 }
--- a/src/dec/common_dec.h
+++ b/src/dec/common_dec.h
@@ -11,8 +11,8 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#ifndef WEBP_DEC_COMMON_DEC_H_
-#define WEBP_DEC_COMMON_DEC_H_
+#ifndef WEBP_DEC_COMMON_H_
+#define WEBP_DEC_COMMON_H_

 // intra prediction modes
 enum { B_DC_PRED = 0,   // 4x4 modes
@@ -51,4 +51,4 @@ enum { MB_FEATURE_TREE_PROBS = 3,
       NUM_PROBAS = 11
     };

-#endif    // WEBP_DEC_COMMON_DEC_H_
+#endif    // WEBP_DEC_COMMON_H_
--- a/src/dec/frame_dec.c
+++ b/src/dec/frame_dec.c
@@ -12,13 +12,13 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <stdlib.h>
-#include "src/dec/vp8i_dec.h"
-#include "src/utils/utils.h"
+#include "./vp8i_dec.h"
+#include "../utils/utils.h"

 //------------------------------------------------------------------------------
 // Main reconstruction function.

-static const uint16_t kScan[16] = {
+static const int kScan[16] = {
  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
@@ -320,7 +320,7 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
 #define MIN_DITHER_AMP 4

 #define DITHER_AMP_TAB_SIZE 12
-static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
+static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
  // roughly, it's dqm->uv_mat_[1]
  8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
 };
@@ -728,7 +728,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
  }

  mem = (uint8_t*)dec->mem_;
-  dec->intra_t_ = mem;
+  dec->intra_t_ = (uint8_t*)mem;
  mem += intra_pred_mode_size;

  dec->yuv_t_ = (VP8TopSamples*)mem;
@@ -750,7 +750,7 @@ static int AllocateMemory(VP8Decoder* const dec) {

  mem = (uint8_t*)WEBP_ALIGN(mem);
  assert((yuv_size & WEBP_ALIGN_CST) == 0);
-  dec->yuv_b_ = mem;
+  dec->yuv_b_ = (uint8_t*)mem;
  mem += yuv_size;

  dec->mb_data_ = (VP8MBData*)mem;
@@ -766,7 +766,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
    const int extra_rows = kFilterExtraRows[dec->filter_type_];
    const int extra_y = extra_rows * dec->cache_y_stride_;
    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
-    dec->cache_y_ = mem + extra_y;
+    dec->cache_y_ = ((uint8_t*)mem) + extra_y;
    dec->cache_u_ = dec->cache_y_
                  + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
    dec->cache_v_ = dec->cache_u_
@@ -776,7 +776,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
  mem += cache_size;

  // alpha plane
-  dec->alpha_plane_ = alpha_size ? mem : NULL;
+  dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
  mem += alpha_size;
  assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);

--- a/src/dec/idec_dec.c
+++ b/src/dec/idec_dec.c
@@ -15,10 +15,10 @@
 #include <string.h>
 #include <stdlib.h>

-#include "src/dec/alphai_dec.h"
-#include "src/dec/webpi_dec.h"
-#include "src/dec/vp8i_dec.h"
-#include "src/utils/utils.h"
+#include "./alphai_dec.h"
+#include "./webpi_dec.h"
+#include "./vp8i_dec.h"
+#include "../utils/utils.h"

 // In append mode, buffer allocations increase as multiples of this value.
 // Needs to be a power of 2.
@@ -283,8 +283,10 @@ static void RestoreContext(const MBContext* context, VP8Decoder* const dec,

 static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
  if (idec->state_ == STATE_VP8_DATA) {
-    // Synchronize the thread, clean-up and check for errors.
-    VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
+    VP8Io* const io = &idec->io_;
+    if (io->teardown != NULL) {
+      io->teardown(io);
+    }
  }
  idec->state_ = STATE_ERROR;
  return error;
@@ -449,10 +451,7 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
  VP8Io* const io = &idec->io_;

-  // Make sure partition #0 has been read before, to set dec to ready_.
-  if (!dec->ready_) {
-    return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
-  }
+  assert(dec->ready_);
  for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
    if (idec->last_mb_y_ != dec->mb_y_) {
      if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
@@ -492,7 +491,6 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
  }
  // Synchronize the thread and check for errors.
  if (!VP8ExitCritical(dec, io)) {
-    idec->state_ = STATE_ERROR;  // prevent re-entry in IDecError
    return IDecError(idec, VP8_STATUS_USER_ABORT);
  }
  dec->ready_ = 0;
@@ -573,10 +571,6 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
    status = DecodePartition0(idec);
  }
  if (idec->state_ == STATE_VP8_DATA) {
-    const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
-    if (dec == NULL) {
-      return VP8_STATUS_SUSPENDED;  // can't continue if we have no decoder.
-    }
    status = DecodeRemaining(idec);
  }
  if (idec->state_ == STATE_VP8L_HEADER) {
@@ -679,12 +673,12 @@ void WebPIDelete(WebPIDecoder* idec) {
 //------------------------------------------------------------------------------
 // Wrapper toward WebPINewDecoder

-WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
+WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
                          size_t output_buffer_size, int output_stride) {
  const int is_external_memory = (output_buffer != NULL) ? 1 : 0;
  WebPIDecoder* idec;

-  if (csp >= MODE_YUV) return NULL;
+  if (mode >= MODE_YUV) return NULL;
  if (is_external_memory == 0) {    // Overwrite parameters to sane values.
    output_buffer_size = 0;
    output_stride = 0;
@@ -695,7 +689,7 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
  }
  idec = WebPINewDecoder(NULL);
  if (idec == NULL) return NULL;
-  idec->output_.colorspace = csp;
+  idec->output_.colorspace = mode;
  idec->output_.is_external_memory = is_external_memory;
  idec->output_.u.RGBA.rgba = output_buffer;
  idec->output_.u.RGBA.stride = output_stride;
--- a/src/dec/io_dec.c
+++ b/src/dec/io_dec.c
@@ -13,11 +13,11 @@

 #include <assert.h>
 #include <stdlib.h>
-#include "src/dec/vp8i_dec.h"
-#include "src/dec/webpi_dec.h"
-#include "src/dsp/dsp.h"
-#include "src/dsp/yuv.h"
-#include "src/utils/utils.h"
+#include "../dec/vp8i_dec.h"
+#include "./webpi_dec.h"
+#include "../dsp/dsp.h"
+#include "../dsp/yuv.h"
+#include "../utils/utils.h"

 //------------------------------------------------------------------------------
 // Main YUV<->RGB conversion functions
@@ -212,7 +212,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
    int num_rows;
    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
-#if (WEBP_SWAP_16BIT_CSP == 1)
+#ifdef WEBP_SWAP_16BIT_CSP
    uint8_t* alpha_dst = base_rgba;
 #else
    uint8_t* alpha_dst = base_rgba + 1;
@@ -241,7 +241,6 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
 //------------------------------------------------------------------------------
 // YUV rescaling (no final RGB conversion needed)

-#if !defined(WEBP_REDUCE_SIZE)
 static int Rescale(const uint8_t* src, int src_stride,
                   int new_lines, WebPRescaler* const wrk) {
  int num_lines_out = 0;
@@ -432,7 +431,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
                               int max_lines_out) {
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
  uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
-#if (WEBP_SWAP_16BIT_CSP == 1)
+#ifdef WEBP_SWAP_16BIT_CSP
  uint8_t* alpha_dst = base_rgba;
 #else
  uint8_t* alpha_dst = base_rgba + 1;
@@ -542,8 +541,6 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
  return 1;
 }

-#endif  // WEBP_REDUCE_SIZE
-
 //------------------------------------------------------------------------------
 // Default custom functions

@@ -564,14 +561,10 @@ static int CustomSetup(VP8Io* io) {
    WebPInitUpsamplers();
  }
  if (io->use_scaling) {
-#if !defined(WEBP_REDUCE_SIZE)
    const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
    if (!ok) {
      return 0;    // memory error
    }
-#else
-    return 0;   // rescaling support not compiled
-#endif
  } else {
    if (is_rgb) {
      WebPInitSamplers();
@@ -605,6 +598,9 @@ static int CustomSetup(VP8Io* io) {
    }
  }

+  if (is_rgb) {
+    VP8YUVInit();
+  }
  return 1;
 }

--- a/src/dec/quant_dec.c
+++ b/src/dec/quant_dec.c
@@ -11,7 +11,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dec/vp8i_dec.h"
+#include "./vp8i_dec.h"

 static WEBP_INLINE int clip(int v, int M) {
  return v < 0 ? 0 : v > M ? M : v;
--- a/src/dec/tree_dec.c
+++ b/src/dec/tree_dec.c
@@ -11,19 +11,15 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dec/vp8i_dec.h"
-#include "src/utils/bit_reader_inl_utils.h"
+#include "./vp8i_dec.h"
+#include "../utils/bit_reader_inl_utils.h"

-#if !defined(USE_GENERIC_TREE)
 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
 // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
-#define USE_GENERIC_TREE 1   // ALTERNATE_CODE
-#else
-#define USE_GENERIC_TREE 0
+#define USE_GENERIC_TREE
 #endif
-#endif  // USE_GENERIC_TREE

-#if (USE_GENERIC_TREE == 1)
+#ifdef USE_GENERIC_TREE
 static const int8_t kYModesIntra4[18] = {
  -B_DC_PRED, 1,
    -B_TM_PRED, 2,
@@ -321,7 +317,7 @@ static void ParseIntraMode(VP8BitReader* const br,
      int x;
      for (x = 0; x < 4; ++x) {
        const uint8_t* const prob = kBModesProba[top[x]][ymode];
-#if (USE_GENERIC_TREE == 1)
+#ifdef USE_GENERIC_TREE
        // Generic tree-parsing
        int i = kYModesIntra4[VP8GetBit(br, prob[0])];
        while (i > 0) {
@@ -339,7 +335,7 @@ static void ParseIntraMode(VP8BitReader* const br,
                        (!VP8GetBit(br, prob[6]) ? B_LD_PRED :
                          (!VP8GetBit(br, prob[7]) ? B_VL_PRED :
                            (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
-#endif  // USE_GENERIC_TREE
+#endif    // USE_GENERIC_TREE
        top[x] = ymode;
      }
      memcpy(modes, top, 4 * sizeof(*top));
@@ -502,7 +498,7 @@ static const uint8_t

 // Paragraph 9.9

-static const uint8_t kBands[16 + 1] = {
+static const int kBands[16 + 1] = {
  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
  0  // extra entry as sentinel
 };
--- a/src/dec/vp8_dec.c
+++ b/src/dec/vp8_dec.c
@@ -13,12 +13,12 @@

 #include <stdlib.h>

-#include "src/dec/alphai_dec.h"
-#include "src/dec/vp8i_dec.h"
-#include "src/dec/vp8li_dec.h"
-#include "src/dec/webpi_dec.h"
-#include "src/utils/bit_reader_inl_utils.h"
-#include "src/utils/utils.h"
+#include "./alphai_dec.h"
+#include "./vp8i_dec.h"
+#include "./vp8li_dec.h"
+#include "./webpi_dec.h"
+#include "../utils/bit_reader_inl_utils.h"
+#include "../utils/utils.h"

 //------------------------------------------------------------------------------

--- a/src/dec/vp8_dec.h
+++ b/src/dec/vp8_dec.h
@@ -11,10 +11,10 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#ifndef WEBP_DEC_VP8_DEC_H_
-#define WEBP_DEC_VP8_DEC_H_
+#ifndef WEBP_WEBP_DECODE_VP8_H_
+#define WEBP_WEBP_DECODE_VP8_H_

-#include "src/webp/decode.h"
+#include "../webp/decode.h"

 #ifdef __cplusplus
 extern "C" {
@@ -33,7 +33,7 @@ extern "C" {
 //   /* customize io's functions (setup()/put()/teardown()) if needed. */
 //
 //   VP8Decoder* dec = VP8New();
-//   int ok = VP8Decode(dec, &io);
+//   bool ok = VP8Decode(dec);
 //   if (!ok) printf("Error: %s\n", VP8StatusMessage(dec));
 //   VP8Delete(dec);
 //   return ok;
@@ -157,24 +157,24 @@ void VP8Delete(VP8Decoder* const dec);
 // Miscellaneous VP8/VP8L bitstream probing functions.

 // Returns true if the next 3 bytes in data contain the VP8 signature.
-WEBP_EXTERN int VP8CheckSignature(const uint8_t* const data, size_t data_size);
+WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);

 // Validates the VP8 data-header and retrieves basic header information viz
 // width and height. Returns 0 in case of formatting error. *width/*height
 // can be passed NULL.
-WEBP_EXTERN int VP8GetInfo(
+WEBP_EXTERN(int) VP8GetInfo(
    const uint8_t* data,
    size_t data_size,    // data available so far
    size_t chunk_size,   // total data size expected in the chunk
    int* const width, int* const height);

 // Returns true if the next byte(s) in data is a VP8L signature.
-WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
+WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);

 // Validates the VP8L data-header and retrieves basic header information viz
 // width, height and alpha. Returns 0 in case of formatting error.
 // width/height/has_alpha can be passed NULL.
-WEBP_EXTERN int VP8LGetInfo(
+WEBP_EXTERN(int) VP8LGetInfo(
    const uint8_t* data, size_t data_size,  // data available so far
    int* const width, int* const height, int* const has_alpha);

@@ -182,4 +182,4 @@ WEBP_EXTERN int VP8LGetInfo(
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_VP8_DEC_H_ */
+#endif  /* WEBP_WEBP_DECODE_VP8_H_ */
--- a/src/dec/vp8i_dec.h
+++ b/src/dec/vp8i_dec.h
@@ -11,16 +11,16 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#ifndef WEBP_DEC_VP8I_DEC_H_
-#define WEBP_DEC_VP8I_DEC_H_
+#ifndef WEBP_DEC_VP8I_H_
+#define WEBP_DEC_VP8I_H_

 #include <string.h>     // for memcpy()
-#include "src/dec/common_dec.h"
-#include "src/dec/vp8li_dec.h"
-#include "src/utils/bit_reader_utils.h"
-#include "src/utils/random_utils.h"
-#include "src/utils/thread_utils.h"
-#include "src/dsp/dsp.h"
+#include "./common_dec.h"
+#include "./vp8li_dec.h"
+#include "../utils/bit_reader_utils.h"
+#include "../utils/random_utils.h"
+#include "../utils/thread_utils.h"
+#include "../dsp/dsp.h"

 #ifdef __cplusplus
 extern "C" {
@@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define DEC_MAJ_VERSION 0
 #define DEC_MIN_VERSION 6
-#define DEC_REV_VERSION 1
+#define DEC_REV_VERSION 0

 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
 // Constraints are: We need to store one 16x16 block of luma samples (y),
@@ -57,6 +57,7 @@ extern "C" {
 //  '|' = left sample,   '-' = top sample,    '+' = top-left sample
 //  't' = extra top-right sample for 4x4 modes
 #define YUV_SIZE (BPS * 17 + BPS * 9)
+#define Y_SIZE   (BPS * 17)
 #define Y_OFF    (BPS * 1 + 8)
 #define U_OFF    (Y_OFF + BPS * 16 + BPS)
 #define V_OFF    (U_OFF + 16)
@@ -316,4 +317,4 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_VP8I_DEC_H_ */
+#endif  /* WEBP_DEC_VP8I_H_ */
--- a/src/dec/vp8l_dec.c
+++ b/src/dec/vp8l_dec.c
@@ -14,22 +14,22 @@

 #include <stdlib.h>

-#include "src/dec/alphai_dec.h"
-#include "src/dec/vp8li_dec.h"
-#include "src/dsp/dsp.h"
-#include "src/dsp/lossless.h"
-#include "src/dsp/lossless_common.h"
-#include "src/dsp/yuv.h"
-#include "src/utils/endian_inl_utils.h"
-#include "src/utils/huffman_utils.h"
-#include "src/utils/utils.h"
+#include "./alphai_dec.h"
+#include "./vp8li_dec.h"
+#include "../dsp/dsp.h"
+#include "../dsp/lossless.h"
+#include "../dsp/lossless_common.h"
+#include "../dsp/yuv.h"
+#include "../utils/endian_inl_utils.h"
+#include "../utils/huffman_utils.h"
+#include "../utils/utils.h"

 #define NUM_ARGB_CACHE_ROWS          16

 static const int kCodeLengthLiterals = 16;
 static const int kCodeLengthRepeatCode = 16;
-static const uint8_t kCodeLengthExtraBits[3] = { 2, 3, 7 };
-static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
+static const int kCodeLengthExtraBits[3] = { 2, 3, 7 };
+static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };

 // -----------------------------------------------------------------------------
 //  Five Huffman codes are used at each meta code:
@@ -86,7 +86,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
 // All values computed for 8-bit first level lookup with Mark Adler's tool:
 // http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
 #define FIXED_TABLE_SIZE (630 * 3 + 410)
-static const uint16_t kTableSize[12] = {
+static const int kTableSize[12] = {
  FIXED_TABLE_SIZE + 654,
  FIXED_TABLE_SIZE + 656,
  FIXED_TABLE_SIZE + 658,
@@ -253,11 +253,11 @@ static int ReadHuffmanCodeLengths(
  int symbol;
  int max_symbol;
  int prev_code_len = DEFAULT_CODE_LENGTH;
-  HuffmanTables tables;
+  HuffmanCode table[1 << LENGTHS_TABLE_BITS];

-  if (!VP8LHuffmanTablesAllocate(1 << LENGTHS_TABLE_BITS, &tables) ||
-      !VP8LBuildHuffmanTable(&tables, LENGTHS_TABLE_BITS,
-                             code_length_code_lengths, NUM_CODE_LENGTH_CODES)) {
+  if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS,
+                             code_length_code_lengths,
+                             NUM_CODE_LENGTH_CODES)) {
    goto End;
  }

@@ -277,7 +277,7 @@ static int ReadHuffmanCodeLengths(
    int code_len;
    if (max_symbol-- == 0) break;
    VP8LFillBitWindow(br);
-    p = &tables.curr_segment->start[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
+    p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
    VP8LSetBitPos(br, br->bit_pos_ + p->bits);
    code_len = p->value;
    if (code_len < kCodeLengthLiterals) {
@@ -300,7 +300,6 @@ static int ReadHuffmanCodeLengths(
  ok = 1;

 End:
-  VP8LHuffmanTablesDeallocate(&tables);
  if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
  return ok;
 }
@@ -308,8 +307,7 @@ static int ReadHuffmanCodeLengths(
 // 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman
 // tree.
 static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
-                           int* const code_lengths,
-                           HuffmanTables* const table) {
+                           int* const code_lengths, HuffmanCode* const table) {
  int ok = 0;
  int size = 0;
  VP8LBitReader* const br = &dec->br_;
@@ -364,18 +362,12 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
  VP8LMetadata* const hdr = &dec->hdr_;
  uint32_t* huffman_image = NULL;
  HTreeGroup* htree_groups = NULL;
-  HuffmanTables* huffman_tables = &hdr->huffman_tables_;
+  HuffmanCode* huffman_tables = NULL;
+  HuffmanCode* next = NULL;
  int num_htree_groups = 1;
-  int num_htree_groups_max = 1;
  int max_alphabet_size = 0;
  int* code_lengths = NULL;
  const int table_size = kTableSize[color_cache_bits];
-  int* mapping = NULL;
-  int ok = 0;
-
-  // Check the table has been 0 initialized (through InitMetadata).
-  assert(huffman_tables->root.start == NULL);
-  assert(huffman_tables->curr_segment == NULL);

  if (allow_recursion && VP8LReadBits(br, 1)) {
    // use meta Huffman codes.
@@ -392,36 +384,10 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
      // The huffman data is stored in red and green bytes.
      const int group = (huffman_image[i] >> 8) & 0xffff;
      huffman_image[i] = group;
-      if (group >= num_htree_groups_max) {
-        num_htree_groups_max = group + 1;
+      if (group >= num_htree_groups) {
+        num_htree_groups = group + 1;
      }
    }
-    // Check the validity of num_htree_groups_max. If it seems too big, use a
-    // smaller value for later. This will prevent big memory allocations to end
-    // up with a bad bitstream anyway.
-    // The value of 1000 is totally arbitrary. We know that num_htree_groups_max
-    // is smaller than (1 << 16) and should be smaller than the number of pixels
-    // (though the format allows it to be bigger).
-    if (num_htree_groups_max > 1000 || num_htree_groups_max > xsize * ysize) {
-      // Create a mapping from the used indices to the minimal set of used
-      // values [0, num_htree_groups)
-      mapping = (int*)WebPSafeMalloc(num_htree_groups_max, sizeof(*mapping));
-      if (mapping == NULL) {
-        dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
-        goto Error;
-      }
-      // -1 means a value is unmapped, and therefore unused in the Huffman
-      // image.
-      memset(mapping, 0xff, num_htree_groups_max * sizeof(*mapping));
-      for (num_htree_groups = 0, i = 0; i < huffman_pixs; ++i) {
-        // Get the current mapping for the group and remap the Huffman image.
-        int* const mapped_group = &mapping[huffman_image[i]];
-        if (*mapped_group == -1) *mapped_group = num_htree_groups++;
-        huffman_image[i] = *mapped_group;
-      }
-    } else {
-      num_htree_groups = num_htree_groups_max;
-    }
  }

  if (br->eos_) goto Error;
@@ -437,105 +403,88 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
    }
  }

+  huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size,
+                                                sizeof(*huffman_tables));
  htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
  code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
                                      sizeof(*code_lengths));

-  if (htree_groups == NULL || code_lengths == NULL ||
-      !VP8LHuffmanTablesAllocate(num_htree_groups * table_size,
-                                 huffman_tables)) {
+  if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
    goto Error;
  }

-  for (i = 0; i < num_htree_groups_max; ++i) {
-    // If the index "i" is unused in the Huffman image, just make sure the
-    // coefficients are valid but do not store them.
-    if (mapping != NULL && mapping[i] == -1) {
-      for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
-        int alphabet_size = kAlphabetSize[j];
-        if (j == 0 && color_cache_bits > 0) {
-          alphabet_size += (1 << color_cache_bits);
-        }
-        // Passing in NULL so that nothing gets filled.
-        if (!ReadHuffmanCode(alphabet_size, dec, code_lengths, NULL)) {
-          goto Error;
-        }
+  next = huffman_tables;
+  for (i = 0; i < num_htree_groups; ++i) {
+    HTreeGroup* const htree_group = &htree_groups[i];
+    HuffmanCode** const htrees = htree_group->htrees;
+    int size;
+    int total_size = 0;
+    int is_trivial_literal = 1;
+    int max_bits = 0;
+    for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+      int alphabet_size = kAlphabetSize[j];
+      htrees[j] = next;
+      if (j == 0 && color_cache_bits > 0) {
+        alphabet_size += 1 << color_cache_bits;
      }
-    } else {
-      HTreeGroup* const htree_group =
-          &htree_groups[(mapping == NULL) ? i : mapping[i]];
-      HuffmanCode** const htrees = htree_group->htrees;
-      int size;
-      int total_size = 0;
-      int is_trivial_literal = 1;
-      int max_bits = 0;
-      for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
-        int alphabet_size = kAlphabetSize[j];
-        if (j == 0 && color_cache_bits > 0) {
-          alphabet_size += (1 << color_cache_bits);
-        }
-        size =
-            ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables);
-        htrees[j] = huffman_tables->curr_segment->curr_table;
-        if (size == 0) {
-          goto Error;
-        }
-        if (is_trivial_literal && kLiteralMap[j] == 1) {
-          is_trivial_literal = (htrees[j]->bits == 0);
-        }
-        total_size += htrees[j]->bits;
-        huffman_tables->curr_segment->curr_table += size;
-        if (j <= ALPHA) {
-          int local_max_bits = code_lengths[0];
-          int k;
-          for (k = 1; k < alphabet_size; ++k) {
-            if (code_lengths[k] > local_max_bits) {
-              local_max_bits = code_lengths[k];
-            }
+      size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next);
+      if (size == 0) {
+        goto Error;
+      }
+      if (is_trivial_literal && kLiteralMap[j] == 1) {
+        is_trivial_literal = (next->bits == 0);
+      }
+      total_size += next->bits;
+      next += size;
+      if (j <= ALPHA) {
+        int local_max_bits = code_lengths[0];
+        int k;
+        for (k = 1; k < alphabet_size; ++k) {
+          if (code_lengths[k] > local_max_bits) {
+            local_max_bits = code_lengths[k];
          }
-          max_bits += local_max_bits;
        }
+        max_bits += local_max_bits;
      }
-      htree_group->is_trivial_literal = is_trivial_literal;
-      htree_group->is_trivial_code = 0;
-      if (is_trivial_literal) {
-        const int red = htrees[RED][0].value;
-        const int blue = htrees[BLUE][0].value;
-        const int alpha = htrees[ALPHA][0].value;
-        htree_group->literal_arb = ((uint32_t)alpha << 24) | (red << 16) | blue;
-        if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
-          htree_group->is_trivial_code = 1;
-          htree_group->literal_arb |= htrees[GREEN][0].value << 8;
-        }
-      }
-      htree_group->use_packed_table =
-          !htree_group->is_trivial_code && (max_bits < HUFFMAN_PACKED_BITS);
-      if (htree_group->use_packed_table) BuildPackedTable(htree_group);
    }
+    htree_group->is_trivial_literal = is_trivial_literal;
+    htree_group->is_trivial_code = 0;
+    if (is_trivial_literal) {
+      const int red = htrees[RED][0].value;
+      const int blue = htrees[BLUE][0].value;
+      const int alpha = htrees[ALPHA][0].value;
+      htree_group->literal_arb =
+          ((uint32_t)alpha << 24) | (red << 16) | blue;
+      if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
+        htree_group->is_trivial_code = 1;
+        htree_group->literal_arb |= htrees[GREEN][0].value << 8;
+      }
+    }
+    htree_group->use_packed_table = !htree_group->is_trivial_code &&
+                                    (max_bits < HUFFMAN_PACKED_BITS);
+    if (htree_group->use_packed_table) BuildPackedTable(htree_group);
  }
-  ok = 1;
+  WebPSafeFree(code_lengths);

-  // All OK. Finalize pointers.
+  // All OK. Finalize pointers and return.
  hdr->huffman_image_ = huffman_image;
  hdr->num_htree_groups_ = num_htree_groups;
  hdr->htree_groups_ = htree_groups;
+  hdr->huffman_tables_ = huffman_tables;
+  return 1;

 Error:
  WebPSafeFree(code_lengths);
-  WebPSafeFree(mapping);
-  if (!ok) {
-    WebPSafeFree(huffman_image);
-    VP8LHuffmanTablesDeallocate(huffman_tables);
-    VP8LHtreeGroupsFree(htree_groups);
-  }
-  return ok;
+  WebPSafeFree(huffman_image);
+  WebPSafeFree(huffman_tables);
+  VP8LHtreeGroupsFree(htree_groups);
+  return 0;
 }

 //------------------------------------------------------------------------------
 // Scaling.

-#if !defined(WEBP_REDUCE_SIZE)
 static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
  const int num_channels = 4;
  const int in_width = io->mb_w;
@@ -567,13 +516,10 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
                   out_width, out_height, 0, num_channels, work);
  return 1;
 }
-#endif   // WEBP_REDUCE_SIZE

 //------------------------------------------------------------------------------
 // Export to ARGB

-#if !defined(WEBP_REDUCE_SIZE)
-
 // We have special "export" function since we need to convert from BGRA
 static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
                  int rgba_stride, uint8_t* const rgba) {
@@ -615,8 +561,6 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
  return num_lines_out;
 }

-#endif   // WEBP_REDUCE_SIZE
-
 // Emit rows without any scaling.
 static int EmitRows(WEBP_CSP_MODE colorspace,
                    const uint8_t* row_in, int in_stride,
@@ -802,12 +746,9 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
      if (WebPIsRGBMode(output->colorspace)) {  // convert to RGBA
        const WebPRGBABuffer* const buf = &output->u.RGBA;
        uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
-        const int num_rows_out =
-#if !defined(WEBP_REDUCE_SIZE)
-         io->use_scaling ?
+        const int num_rows_out = io->use_scaling ?
            EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
                                 rgba, buf->stride) :
-#endif  // WEBP_REDUCE_SIZE
            EmitRows(output->colorspace, rows_data, in_stride,
                     io->mb_w, io->mb_h, rgba, buf->stride);
        // Update 'last_out_row_'.
@@ -934,11 +875,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
 #endif
        break;
      case 2:
-#if !defined(WORDS_BIGENDIAN)
        memcpy(&pattern, src, sizeof(uint16_t));
-#else
-        pattern = ((uint32_t)src[0] << 8) | src[1];
-#endif
 #if defined(__arm__) || defined(_M_ARM)
        pattern |= pattern << 16;
 #elif defined(WEBP_USE_MIPS_DSP_R2)
@@ -1075,13 +1012,12 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
      ok = 0;
      goto End;
    }
-    br->eos_ = VP8LIsEndOfStream(br);
+    assert(br->eos_ == VP8LIsEndOfStream(br));
  }
  // Process the remaining rows corresponding to last row-block.
  ExtractPalettedAlphaRows(dec, row > last_row ? last_row : row);

 End:
-  br->eos_ = VP8LIsEndOfStream(br);
  if (!ok || (br->eos_ && pos < end)) {
    ok = 0;
    dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED
@@ -1154,12 +1090,11 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
    VP8LFillBitWindow(br);
    if (htree_group->use_packed_table) {
      code = ReadPackedSymbols(htree_group, br, src);
-      if (VP8LIsEndOfStream(br)) break;
      if (code == PACKED_NON_LITERAL_CODE) goto AdvanceByOne;
    } else {
      code = ReadSymbol(htree_group->htrees[GREEN], br);
    }
-    if (VP8LIsEndOfStream(br)) break;
+    if (br->eos_) break;  // early out
    if (code < NUM_LITERAL_CODES) {  // Literal
      if (htree_group->is_trivial_literal) {
        *src = htree_group->literal_arb | (code << 8);
@@ -1169,7 +1104,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
        VP8LFillBitWindow(br);
        blue = ReadSymbol(htree_group->htrees[BLUE], br);
        alpha = ReadSymbol(htree_group->htrees[ALPHA], br);
-        if (VP8LIsEndOfStream(br)) break;
+        if (br->eos_) break;
        *src = ((uint32_t)alpha << 24) | (red << 16) | (code << 8) | blue;
      }
    AdvanceByOne:
@@ -1197,7 +1132,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
      VP8LFillBitWindow(br);
      dist_code = GetCopyDistance(dist_symbol, br);
      dist = PlaneCodeToDistance(width, dist_code);
-      if (VP8LIsEndOfStream(br)) break;
+      if (br->eos_) break;
      if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) {
        goto Error;
      } else {
@@ -1234,23 +1169,12 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
    } else {  // Not reached
      goto Error;
    }
+    assert(br->eos_ == VP8LIsEndOfStream(br));
  }

-  br->eos_ = VP8LIsEndOfStream(br);
-  // In incremental decoding:
-  // br->eos_ && src < src_last: if 'br' reached the end of the buffer and
-  // 'src_last' has not been reached yet, there is not enough data. 'dec' has to
-  // be reset until there is more data.
-  // !br->eos_ && src < src_last: this cannot happen as either the buffer is
-  // fully read, either enough has been read to reach 'src_last'.
-  // src >= src_last: 'src_last' is reached, all is fine. 'src' can actually go
-  // beyond 'src_last' in case the image is cropped and an LZ77 goes further.
-  // The buffer might have been enough or there is some left. 'br->eos_' does
-  // not matter.
-  assert(!dec->incremental_ || (br->eos_ && src < src_last) || src >= src_last);
-  if (dec->incremental_ && br->eos_ && src < src_last) {
+  if (dec->incremental_ && br->eos_ && src < src_end) {
    RestoreState(dec);
-  } else if ((dec->incremental_ && src >= src_last) || !br->eos_) {
+  } else if (!br->eos_) {
    // Process the remaining rows corresponding to last row-block.
    if (process_func != NULL) {
      process_func(dec, row > last_row ? last_row : row);
@@ -1369,7 +1293,7 @@ static void ClearMetadata(VP8LMetadata* const hdr) {
  assert(hdr != NULL);

  WebPSafeFree(hdr->huffman_image_);
-  VP8LHuffmanTablesDeallocate(&hdr->huffman_tables_);
+  WebPSafeFree(hdr->huffman_tables_);
  VP8LHtreeGroupsFree(hdr->htree_groups_);
  VP8LColorCacheClear(&hdr->color_cache_);
  VP8LColorCacheClear(&hdr->saved_color_cache_);
@@ -1685,7 +1609,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
  // Sanity checks.
  if (dec == NULL) return 0;

-  assert(dec->hdr_.huffman_tables_.root.start != NULL);
+  assert(dec->hdr_.huffman_tables_ != NULL);
  assert(dec->hdr_.htree_groups_ != NULL);
  assert(dec->hdr_.num_htree_groups_ > 0);

@@ -1706,19 +1630,12 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {

    if (!AllocateInternalBuffers32b(dec, io->width)) goto Err;

-#if !defined(WEBP_REDUCE_SIZE)
    if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;

    if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
      // need the alpha-multiply functions for premultiplied output or rescaling
      WebPInitAlphaProcessing();
    }
-#else
-    if (io->use_scaling) {
-      dec->status_ = VP8_STATUS_INVALID_PARAM;
-      goto Err;
-    }
-#endif
    if (!WebPIsRGBMode(dec->output_->colorspace)) {
      WebPInitConvertARGBToYUV();
      if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing();
--- a/src/dec/vp8li_dec.h
+++ b/src/dec/vp8li_dec.h
@@ -12,14 +12,14 @@
 // Author: Skal (pascal.massimino@gmail.com)
 //         Vikas Arora(vikaas.arora@gmail.com)

-#ifndef WEBP_DEC_VP8LI_DEC_H_
-#define WEBP_DEC_VP8LI_DEC_H_
+#ifndef WEBP_DEC_VP8LI_H_
+#define WEBP_DEC_VP8LI_H_

 #include <string.h>     // for memcpy()
-#include "src/dec/webpi_dec.h"
-#include "src/utils/bit_reader_utils.h"
-#include "src/utils/color_cache_utils.h"
-#include "src/utils/huffman_utils.h"
+#include "./webpi_dec.h"
+#include "../utils/bit_reader_utils.h"
+#include "../utils/color_cache_utils.h"
+#include "../utils/huffman_utils.h"

 #ifdef __cplusplus
 extern "C" {
@@ -51,7 +51,7 @@ typedef struct {
  uint32_t       *huffman_image_;
  int             num_htree_groups_;
  HTreeGroup     *htree_groups_;
-  HuffmanTables   huffman_tables_;
+  HuffmanCode    *huffman_tables_;
 } VP8LMetadata;

 typedef struct VP8LDecoder VP8LDecoder;
@@ -132,4 +132,4 @@ void VP8LDelete(VP8LDecoder* const dec);
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_VP8LI_DEC_H_ */
+#endif  /* WEBP_DEC_VP8LI_H_ */
--- a/src/dec/webp_dec.c
+++ b/src/dec/webp_dec.c
@@ -13,11 +13,11 @@

 #include <stdlib.h>

-#include "src/dec/vp8i_dec.h"
-#include "src/dec/vp8li_dec.h"
-#include "src/dec/webpi_dec.h"
-#include "src/utils/utils.h"
-#include "src/webp/mux_types.h"  // ALPHA_FLAG
+#include "./vp8i_dec.h"
+#include "./vp8li_dec.h"
+#include "./webpi_dec.h"
+#include "../utils/utils.h"
+#include "../webp/mux_types.h"  // ALPHA_FLAG

 //------------------------------------------------------------------------------
 // RIFF layout is:
@@ -421,9 +421,7 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
                                NULL, NULL, NULL, &has_animation,
                                NULL, headers);
  if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
-    // The WebPDemux API + libwebp can be used to decode individual
-    // uncomposited frames or the WebPAnimDecoder can be used to fully
-    // reconstruct them (see webp/demux.h).
+    // TODO(jzern): full support of animation frames will require API additions.
    if (has_animation) {
      status = VP8_STATUS_UNSUPPORTED_FEATURE;
    }
--- a/src/dec/webpi_dec.h
+++ b/src/dec/webpi_dec.h
@@ -11,15 +11,15 @@
 //
 // Author: somnath@google.com (Somnath Banerjee)

-#ifndef WEBP_DEC_WEBPI_DEC_H_
-#define WEBP_DEC_WEBPI_DEC_H_
+#ifndef WEBP_DEC_WEBPI_H_
+#define WEBP_DEC_WEBPI_H_

 #ifdef __cplusplus
 extern "C" {
 #endif

-#include "src/utils/rescaler_utils.h"
-#include "src/dec/vp8_dec.h"
+#include "../utils/rescaler_utils.h"
+#include "./vp8_dec.h"

 //------------------------------------------------------------------------------
 // WebPDecParams: Decoding output parameters. Transient internal object.
@@ -130,4 +130,4 @@ int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_WEBPI_DEC_H_ */
+#endif  /* WEBP_DEC_WEBPI_H_ */
--- a/src/demux/Makefile.am
+++ b/src/demux/Makefile.am
@@ -1,4 +1,3 @@
-AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
 lib_LTLIBRARIES = libwebpdemux.la

 libwebpdemux_la_SOURCES =
@@ -10,6 +9,6 @@ libwebpdemuxinclude_HEADERS += ../webp/mux_types.h
 libwebpdemuxinclude_HEADERS += ../webp/types.h

 libwebpdemux_la_LIBADD = ../libwebp.la
-libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:3:0
+libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:2:0
 libwebpdemuxincludedir = $(includedir)/webp
 pkgconfig_DATA = libwebpdemux.pc
--- a/src/demux/anim_decode.c
+++ b/src/demux/anim_decode.c
@@ -11,15 +11,15 @@
 //

 #ifdef HAVE_CONFIG_H
-#include "src/webp/config.h"
+#include "../webp/config.h"
 #endif

 #include <assert.h>
 #include <string.h>

-#include "src/utils/utils.h"
-#include "src/webp/decode.h"
-#include "src/webp/demux.h"
+#include "../utils/utils.h"
+#include "../webp/decode.h"
+#include "../webp/demux.h"

 #define NUM_CHANNELS 4

--- a/src/demux/demux.c
+++ b/src/demux/demux.c
@@ -11,21 +11,21 @@
 //

 #ifdef HAVE_CONFIG_H
-#include "src/webp/config.h"
+#include "../webp/config.h"
 #endif

 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>

-#include "src/utils/utils.h"
-#include "src/webp/decode.h"     // WebPGetFeatures
-#include "src/webp/demux.h"
-#include "src/webp/format_constants.h"
+#include "../utils/utils.h"
+#include "../webp/decode.h"     // WebPGetFeatures
+#include "../webp/demux.h"
+#include "../webp/format_constants.h"

 #define DMUX_MAJ_VERSION 0
 #define DMUX_MIN_VERSION 3
-#define DMUX_REV_VERSION 3
+#define DMUX_REV_VERSION 2

 typedef struct {
  size_t start_;        // start location of the data
@@ -205,14 +205,12 @@ static void SetFrameInfo(size_t start_offset, size_t size,
  frame->complete_ = complete;
 }

-// Store image bearing chunks to 'frame'. 'min_size' is an optional size
-// requirement, it may be zero.
+// Store image bearing chunks to 'frame'.
 static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
                              MemBuffer* const mem, Frame* const frame) {
  int alpha_chunks = 0;
  int image_chunks = 0;
-  int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
-              MemDataSize(mem) < min_size);
+  int done = (MemDataSize(mem) < min_size);
  ParseStatus status = PARSE_OK;

  if (done) return PARSE_NEED_MORE_DATA;
@@ -403,9 +401,9 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
  frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
  if (frame == NULL) return PARSE_ERROR;

-  // For the single image case we allow parsing of a partial frame, so no
-  // minimum size is imposed here.
-  status = StoreFrame(1, 0, &dmux->mem_, frame);
+  // For the single image case we allow parsing of a partial frame, but we need
+  // at least CHUNK_HEADER_SIZE for parsing.
+  status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame);
  if (status != PARSE_ERROR) {
    const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
    // Clear any alpha when the alpha flag is missing.
--- a/src/demux/libwebpdemux.rc
+++ b/src/demux/libwebpdemux.rc
@@ -6,8 +6,8 @@
 LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US

 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 0,3,0,3
- PRODUCTVERSION 0,3,0,3
+ FILEVERSION 0,3,0,2
+ PRODUCTVERSION 0,3,0,2
 FILEFLAGSMASK 0x3fL
 #ifdef _DEBUG
 FILEFLAGS 0x1L
@@ -24,12 +24,12 @@ BEGIN
        BEGIN
            VALUE "CompanyName", "Google, Inc."
            VALUE "FileDescription", "libwebpdemux DLL"
-            VALUE "FileVersion", "0.3.3"
+            VALUE "FileVersion", "0.3.2"
            VALUE "InternalName", "libwebpdemux.dll"
            VALUE "LegalCopyright", "Copyright (C) 2017"
            VALUE "OriginalFilename", "libwebpdemux.dll"
            VALUE "ProductName", "WebP Image Demuxer"
-            VALUE "ProductVersion", "0.3.3"
+            VALUE "ProductVersion", "0.3.2"
        END
    END
    BLOCK "VarFileInfo"
--- a/src/dsp/Makefile.am
+++ b/src/dsp/Makefile.am
@@ -1,15 +1,8 @@
-AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
-noinst_LTLIBRARIES =
-noinst_LTLIBRARIES += libwebpdsp.la
-noinst_LTLIBRARIES += libwebpdsp_avx2.la
-noinst_LTLIBRARIES += libwebpdsp_sse2.la
-noinst_LTLIBRARIES += libwebpdspdecode_sse2.la
-noinst_LTLIBRARIES += libwebpdsp_sse41.la
-noinst_LTLIBRARIES += libwebpdspdecode_sse41.la
-noinst_LTLIBRARIES += libwebpdsp_neon.la
-noinst_LTLIBRARIES += libwebpdspdecode_neon.la
-noinst_LTLIBRARIES += libwebpdsp_msa.la
-noinst_LTLIBRARIES += libwebpdspdecode_msa.la
+noinst_LTLIBRARIES = libwebpdsp.la libwebpdsp_avx2.la
+noinst_LTLIBRARIES += libwebpdsp_sse2.la libwebpdspdecode_sse2.la
+noinst_LTLIBRARIES += libwebpdsp_sse41.la libwebpdspdecode_sse41.la
+noinst_LTLIBRARIES += libwebpdsp_neon.la libwebpdspdecode_neon.la
+noinst_LTLIBRARIES += libwebpdsp_msa.la libwebpdspdecode_msa.la

 if BUILD_LIBWEBPDECODER
  noinst_LTLIBRARIES += libwebpdspdecode.la
@@ -46,6 +39,8 @@ COMMON_SOURCES += yuv_mips32.c
 COMMON_SOURCES += yuv_mips_dsp_r2.c

 ENC_SOURCES =
+ENC_SOURCES += argb.c
+ENC_SOURCES += argb_mips_dsp_r2.c
 ENC_SOURCES += cost.c
 ENC_SOURCES += cost_mips32.c
 ENC_SOURCES += cost_mips_dsp_r2.c
@@ -55,7 +50,6 @@ ENC_SOURCES += enc_mips_dsp_r2.c
 ENC_SOURCES += lossless_enc.c
 ENC_SOURCES += lossless_enc_mips32.c
 ENC_SOURCES += lossless_enc_mips_dsp_r2.c
-ENC_SOURCES += ssim.c

 libwebpdsp_avx2_la_SOURCES =
 libwebpdsp_avx2_la_SOURCES += enc_avx2.c
@@ -87,7 +81,6 @@ libwebpdspdecode_neon_la_SOURCES += lossless_neon.c
 libwebpdspdecode_neon_la_SOURCES += neon.h
 libwebpdspdecode_neon_la_SOURCES += rescaler_neon.c
 libwebpdspdecode_neon_la_SOURCES += upsampling_neon.c
-libwebpdspdecode_neon_la_SOURCES += yuv_neon.c
 libwebpdspdecode_neon_la_CPPFLAGS = $(libwebpdsp_neon_la_CPPFLAGS)
 libwebpdspdecode_neon_la_CFLAGS = $(libwebpdsp_neon_la_CFLAGS)

@@ -102,10 +95,10 @@ libwebpdspdecode_msa_la_CPPFLAGS = $(libwebpdsp_msa_la_CPPFLAGS)
 libwebpdspdecode_msa_la_CFLAGS = $(libwebpdsp_msa_la_CFLAGS)

 libwebpdsp_sse2_la_SOURCES =
+libwebpdsp_sse2_la_SOURCES += argb_sse2.c
 libwebpdsp_sse2_la_SOURCES += cost_sse2.c
 libwebpdsp_sse2_la_SOURCES += enc_sse2.c
 libwebpdsp_sse2_la_SOURCES += lossless_enc_sse2.c
-libwebpdsp_sse2_la_SOURCES += ssim_sse2.c
 libwebpdsp_sse2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
 libwebpdsp_sse2_la_CFLAGS = $(AM_CFLAGS) $(SSE2_FLAGS)
 libwebpdsp_sse2_la_LIBADD = libwebpdspdecode_sse2.la
@@ -142,8 +135,7 @@ libwebpdsp_la_CPPFLAGS += $(AM_CPPFLAGS)
 libwebpdsp_la_CPPFLAGS += $(USE_EXPERIMENTAL_CODE) $(USE_SWAP_16BIT_CSP)
 libwebpdsp_la_LDFLAGS = -lm
 libwebpdsp_la_LIBADD =
-libwebpdsp_la_LIBADD += libwebpdsp_avx2.la
-libwebpdsp_la_LIBADD += libwebpdsp_sse2.la
+libwebpdsp_la_LIBADD += libwebpdsp_avx2.la libwebpdsp_sse2.la
 libwebpdsp_la_LIBADD += libwebpdsp_sse41.la
 libwebpdsp_la_LIBADD += libwebpdsp_neon.la
 libwebpdsp_la_LIBADD += libwebpdsp_msa.la
--- a/src/dsp/alpha_processing.c
+++ b/src/dsp/alpha_processing.c
@@ -12,13 +12,10 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <assert.h>
-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 // Tables can be faster on some platform but incur some extra binary size (~2k).
-#if !defined(USE_TABLES_FOR_ALPHA_MULT)
-#define USE_TABLES_FOR_ALPHA_MULT 0   // ALTERNATE_CODE
-#endif
-
+// #define USE_TABLES_FOR_ALPHA_MULT

 // -----------------------------------------------------------------------------

@@ -32,7 +29,7 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
  return v;
 }

-#if (USE_TABLES_FOR_ALPHA_MULT == 1)
+#ifdef USE_TABLES_FOR_ALPHA_MULT

 static const uint32_t kMultTables[2][256] = {
  {    // (255u << MFIX) / alpha
@@ -135,9 +132,9 @@ static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
  return inverse ? (255u << MFIX) / a : a * KINV_255;
 }

-#endif  // USE_TABLES_FOR_ALPHA_MULT
+#endif    // USE_TABLES_FOR_ALPHA_MULT

-void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
+void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
  int x;
  for (x = 0; x < width; ++x) {
    const uint32_t argb = ptr[x];
@@ -157,8 +154,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
  }
 }

-void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
-                   int width, int inverse) {
+void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
+                  int width, int inverse) {
  int x;
  for (x = 0; x < width; ++x) {
    const uint32_t a = alpha[x];
@@ -220,9 +217,8 @@ void WebPMultRows(uint8_t* ptr, int stride,
 #define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
 #endif

-#if !WEBP_NEON_OMIT_C_CODE
-static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
-                                 int w, int h, int stride) {
+static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
+                               int w, int h, int stride) {
  while (h-- > 0) {
    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
@@ -239,7 +235,6 @@ static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
    rgba += stride;
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE
 #undef MULTIPLIER
 #undef PREMULTIPLY

@@ -259,9 +254,9 @@ static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
  return (x * m) >> 16;
 }

-static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
-                                                 int w, int h, int stride,
-                                                 int rg_byte_pos /* 0 or 1 */) {
+static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
+                                               int w, int h, int stride,
+                                               int rg_byte_pos /* 0 or 1 */) {
  while (h-- > 0) {
    int i;
    for (i = 0; i < w; ++i) {
@@ -280,16 +275,15 @@ static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
 }
 #undef MULTIPLIER

-static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
-                                     int w, int h, int stride) {
-#if (WEBP_SWAP_16BIT_CSP == 1)
-  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
+static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
+                                   int w, int h, int stride) {
+#ifdef WEBP_SWAP_16BIT_CSP
+  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
 #else
-  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0);
+  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
 #endif
 }

-#if !WEBP_NEON_OMIT_C_CODE
 static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
                           int width, int height,
                           uint8_t* dst, int dst_stride) {
@@ -344,46 +338,6 @@ static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
  int i;
  for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE
-
-//------------------------------------------------------------------------------
-
-static int HasAlpha8b_C(const uint8_t* src, int length) {
-  while (length-- > 0) if (*src++ != 0xff) return 1;
-  return 0;
-}
-
-static int HasAlpha32b_C(const uint8_t* src, int length) {
-  int x;
-  for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
-  return 0;
-}
-
-//------------------------------------------------------------------------------
-// Simple channel manipulations.
-
-static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
-  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
-}
-
-#ifdef WORDS_BIGENDIAN
-static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                       const uint8_t* b, int len, uint32_t* out) {
-  int i;
-  for (i = 0; i < len; ++i) {
-    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
-  }
-}
-#endif
-
-static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                      int len, int step, uint32_t* out) {
-  int i, offset = 0;
-  for (i = 0; i < len; ++i) {
-    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
-    offset += step;
-  }
-}

 void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
 void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
@@ -391,15 +345,6 @@ int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
 void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
 int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
 void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
-#ifdef WORDS_BIGENDIAN
-void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int, uint32_t*);
-#endif
-void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                    int len, int step, uint32_t* out);
-
-int (*WebPHasAlpha8b)(const uint8_t* src, int length);
-int (*WebPHasAlpha32b)(const uint8_t* src, int length);

 //------------------------------------------------------------------------------
 // Init function
@@ -415,24 +360,15 @@ static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
  if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;

-  WebPMultARGBRow = WebPMultARGBRow_C;
-  WebPMultRow = WebPMultRow_C;
-  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C;
+  WebPMultARGBRow = WebPMultARGBRowC;
+  WebPMultRow = WebPMultRowC;
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
+  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;

-#ifdef WORDS_BIGENDIAN
-  WebPPackARGB = PackARGB_C;
-#endif
-  WebPPackRGB = PackRGB_C;
-#if !WEBP_NEON_OMIT_C_CODE
-  WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
  WebPDispatchAlpha = DispatchAlpha_C;
  WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C;
  WebPExtractAlpha = ExtractAlpha_C;
  WebPExtractGreen = ExtractGreen_C;
-#endif
-
-  WebPHasAlpha8b = HasAlpha8b_C;
-  WebPHasAlpha32b = HasAlpha32b_C;

  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
  if (VP8GetCPUInfo != NULL) {
@@ -446,34 +382,16 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
 #endif
    }
 #endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      WebPInitAlphaProcessingNEON();
+    }
+#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
    if (VP8GetCPUInfo(kMIPSdspR2)) {
      WebPInitAlphaProcessingMIPSdspR2();
    }
 #endif
  }
-
-#if defined(WEBP_USE_NEON)
-  if (WEBP_NEON_OMIT_C_CODE ||
-      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-    WebPInitAlphaProcessingNEON();
-  }
-#endif
-
-  assert(WebPMultARGBRow != NULL);
-  assert(WebPMultRow != NULL);
-  assert(WebPApplyAlphaMultiply != NULL);
-  assert(WebPApplyAlphaMultiply4444 != NULL);
-  assert(WebPDispatchAlpha != NULL);
-  assert(WebPDispatchAlphaToGreen != NULL);
-  assert(WebPExtractAlpha != NULL);
-  assert(WebPExtractGreen != NULL);
-#ifdef WORDS_BIGENDIAN
-  assert(WebPPackARGB != NULL);
-#endif
-  assert(WebPPackRGB != NULL);
-  assert(WebPHasAlpha8b != NULL);
-  assert(WebPHasAlpha32b != NULL);
-
  alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
 }
--- a/src/dsp/alpha_processing_mips_dsp_r2.c
+++ b/src/dsp/alpha_processing_mips_dsp_r2.c
@@ -12,13 +12,13 @@
 // Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
 //            Djordje Pesut  (djordje.pesut@imgtec.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_MIPS_DSP_R2)

-static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
-                                   int width, int height,
-                                   uint8_t* dst, int dst_stride) {
+static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
+                         int width, int height,
+                         uint8_t* dst, int dst_stride) {
  uint32_t alpha_mask = 0xffffffff;
  int i, j, temp0;

@@ -79,8 +79,7 @@ static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
  return (alpha_mask != 0xff);
 }

-static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
-                                  int inverse) {
+static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
  int x;
  const uint32_t c_00ffffff = 0x00ffffffu;
  const uint32_t c_ff000000 = 0xff000000u;
@@ -125,100 +124,14 @@ static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
  }
 }

-#ifdef WORDS_BIGENDIAN
-static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
-                               const uint8_t* g, const uint8_t* b, int len,
-                               uint32_t* out) {
-  int temp0, temp1, temp2, temp3, offset;
-  const int rest = len & 1;
-  const uint32_t* const loop_end = out + len - rest;
-  const int step = 4;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
-}
-#endif  // WORDS_BIGENDIAN
-
-static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
-                              const uint8_t* b, int len, int step,
-                              uint32_t* out) {
-  int temp0, temp1, temp2, offset;
-  const int rest = len & 1;
-  const int a = 0xff;
-  const uint32_t* const loop_end = out + len - rest;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
-}
-
 //------------------------------------------------------------------------------
 // Entry point

 extern void WebPInitAlphaProcessingMIPSdspR2(void);

 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
-  WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
-  WebPMultARGBRow = MultARGBRow_MIPSdspR2;
-#ifdef WORDS_BIGENDIAN
-  WebPPackARGB = PackARGB_MIPSdspR2;
-#endif
-  WebPPackRGB = PackRGB_MIPSdspR2;
+  WebPDispatchAlpha = DispatchAlpha;
+  WebPMultARGBRow = MultARGBRow;
 }

 #else  // !WEBP_USE_MIPS_DSP_R2
--- a/src/dsp/alpha_processing_neon.c
+++ b/src/dsp/alpha_processing_neon.c
@@ -11,11 +11,11 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_NEON)

-#include "src/dsp/neon.h"
+#include "./neon.h"

 //------------------------------------------------------------------------------

@@ -83,7 +83,7 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
 static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
                              int width, int height,
                              uint8_t* dst, int dst_stride) {
-  uint32_t alpha_mask = 0xffu;
+  uint32_t alpha_mask = 0xffffffffu;
  uint8x8_t mask8 = vdup_n_u8(0xff);
  uint32_t tmp[2];
  int i, j;
@@ -107,7 +107,6 @@ static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
    dst += dst_stride;
  }
  vst1_u8((uint8_t*)tmp, mask8);
-  alpha_mask *= 0x01010101;
  alpha_mask &= tmp[0];
  alpha_mask &= tmp[1];
  return (alpha_mask != 0xffffffffu);
@@ -135,7 +134,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride,
 static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
                             int width, int height,
                             uint8_t* alpha, int alpha_stride) {
-  uint32_t alpha_mask = 0xffu;
+  uint32_t alpha_mask = 0xffffffffu;
  uint8x8_t mask8 = vdup_n_u8(0xff);
  uint32_t tmp[2];
  int i, j;
@@ -157,7 +156,6 @@ static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
    alpha += alpha_stride;
  }
  vst1_u8((uint8_t*)tmp, mask8);
-  alpha_mask *= 0x01010101;
  alpha_mask &= tmp[0];
  alpha_mask &= tmp[1];
  return (alpha_mask == 0xffffffffu);
--- a/src/dsp/alpha_processing_sse2.c
+++ b/src/dsp/alpha_processing_sse2.c
@@ -11,16 +11,16 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>

 //------------------------------------------------------------------------------

-static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
-                              int width, int height,
-                              uint8_t* dst, int dst_stride) {
+static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
+                         int width, int height,
+                         uint8_t* dst, int dst_stride) {
  // alpha_and stores an 'and' operation of all the alpha[] values. The final
  // value is not 0xff if any of the alpha[] is not equal to 0xff.
  uint32_t alpha_and = 0xff;
@@ -72,9 +72,9 @@ static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
  return (alpha_and != 0xff);
 }

-static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
-                                      int width, int height,
-                                      uint32_t* dst, int dst_stride) {
+static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
+                                 int width, int height,
+                                 uint32_t* dst, int dst_stride) {
  int i, j;
  const __m128i zero = _mm_setzero_si128();
  const int limit = width & ~15;
@@ -98,9 +98,9 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
  }
 }

-static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
-                             int width, int height,
-                             uint8_t* alpha, int alpha_stride) {
+static int ExtractAlpha(const uint8_t* argb, int argb_stride,
+                        int width, int height,
+                        uint8_t* alpha, int alpha_stride) {
  // alpha_and stores an 'and' operation of all the alpha[] values. The final
  // value is not 0xff if any of the alpha[] is not equal to 0xff.
  uint32_t alpha_and = 0xff;
@@ -210,61 +210,6 @@ static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
 #undef MULTIPLIER
 #undef PREMULTIPLY

-//------------------------------------------------------------------------------
-// Alpha detection
-
-static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
-  const __m128i all_0xff = _mm_set1_epi8(0xff);
-  int i = 0;
-  for (; i + 16 <= length; i += 16) {
-    const __m128i v = _mm_loadu_si128((const __m128i*)(src + i));
-    const __m128i bits = _mm_cmpeq_epi8(v, all_0xff);
-    const int mask = _mm_movemask_epi8(bits);
-    if (mask != 0xffff) return 1;
-  }
-  for (; i < length; ++i) if (src[i] != 0xff) return 1;
-  return 0;
-}
-
-static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
-  const __m128i alpha_mask = _mm_set1_epi32(0xff);
-  const __m128i all_0xff = _mm_set1_epi8(0xff);
-  int i = 0;
-  // We don't know if we can access the last 3 bytes after the last alpha
-  // value 'src[4 * length - 4]' (because we don't know if alpha is the first
-  // or the last byte of the quadruplet). Hence the '-3' protection below.
-  length = length * 4 - 3;   // size in bytes
-  for (; i + 64 <= length; i += 64) {
-    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i +  0));
-    const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
-    const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
-    const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
-    const __m128i b0 = _mm_and_si128(a0, alpha_mask);
-    const __m128i b1 = _mm_and_si128(a1, alpha_mask);
-    const __m128i b2 = _mm_and_si128(a2, alpha_mask);
-    const __m128i b3 = _mm_and_si128(a3, alpha_mask);
-    const __m128i c0 = _mm_packs_epi32(b0, b1);
-    const __m128i c1 = _mm_packs_epi32(b2, b3);
-    const __m128i d  = _mm_packus_epi16(c0, c1);
-    const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
-    const int mask = _mm_movemask_epi8(bits);
-    if (mask != 0xffff) return 1;
-  }
-  for (; i + 32 <= length; i += 32) {
-    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i +  0));
-    const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
-    const __m128i b0 = _mm_and_si128(a0, alpha_mask);
-    const __m128i b1 = _mm_and_si128(a1, alpha_mask);
-    const __m128i c  = _mm_packs_epi32(b0, b1);
-    const __m128i d  = _mm_packus_epi16(c, c);
-    const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
-    const int mask = _mm_movemask_epi8(bits);
-    if (mask != 0xffff) return 1;
-  }
-  for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
-  return 0;
-}
-
 // -----------------------------------------------------------------------------
 // Apply alpha value to rows

@@ -293,7 +238,7 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
    }
  }
  width -= x;
-  if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse);
+  if (width > 0) WebPMultARGBRowC(ptr + x, width, inverse);
 }

 static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
@@ -316,7 +261,7 @@ static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
    }
  }
  width -= x;
-  if (width > 0) WebPMultRow_C(ptr + x, alpha + x, width, inverse);
+  if (width > 0) WebPMultRowC(ptr + x, alpha + x, width, inverse);
 }

 //------------------------------------------------------------------------------
@@ -328,12 +273,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
  WebPMultARGBRow = MultARGBRow_SSE2;
  WebPMultRow = MultRow_SSE2;
  WebPApplyAlphaMultiply = ApplyAlphaMultiply_SSE2;
-  WebPDispatchAlpha = DispatchAlpha_SSE2;
-  WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
-  WebPExtractAlpha = ExtractAlpha_SSE2;
-
-  WebPHasAlpha8b = HasAlpha8b_SSE2;
-  WebPHasAlpha32b = HasAlpha32b_SSE2;
+  WebPDispatchAlpha = DispatchAlpha;
+  WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
+  WebPExtractAlpha = ExtractAlpha;
 }

 #else  // !WEBP_USE_SSE2
--- a/src/dsp/alpha_processing_sse41.c
+++ b/src/dsp/alpha_processing_sse41.c
@@ -11,7 +11,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_SSE41)

@@ -19,9 +19,9 @@

 //------------------------------------------------------------------------------

-static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
-                              int width, int height,
-                              uint8_t* alpha, int alpha_stride) {
+static int ExtractAlpha(const uint8_t* argb, int argb_stride,
+                        int width, int height,
+                        uint8_t* alpha, int alpha_stride) {
  // alpha_and stores an 'and' operation of all the alpha[] values. The final
  // value is not 0xff if any of the alpha[] is not equal to 0xff.
  uint32_t alpha_and = 0xff;
@@ -82,7 +82,7 @@ static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
 extern void WebPInitAlphaProcessingSSE41(void);

 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
-  WebPExtractAlpha = ExtractAlpha_SSE41;
+  WebPExtractAlpha = ExtractAlpha;
 }

 #else  // !WEBP_USE_SSE41
--- a/src/dsp/argb.c
+++ b/src/dsp/argb.c
@@ -0,0 +1,68 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   ARGB making functions.
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
+  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
+}
+
+static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                     const uint8_t* b, int len, uint32_t* out) {
+  int i;
+  for (i = 0; i < len; ++i) {
+    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
+  }
+}
+
+static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                    int len, int step, uint32_t* out) {
+  int i, offset = 0;
+  for (i = 0; i < len; ++i) {
+    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
+    offset += step;
+  }
+}
+
+void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
+                    const uint8_t*, int, uint32_t*);
+void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
+                   int, int, uint32_t*);
+
+extern void VP8EncDspARGBInitMIPSdspR2(void);
+extern void VP8EncDspARGBInitSSE2(void);
+
+static volatile VP8CPUInfo argb_last_cpuinfo_used =
+    (VP8CPUInfo)&argb_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
+  if (argb_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  VP8PackARGB = PackARGB;
+  VP8PackRGB = PackRGB;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8EncDspARGBInitSSE2();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8EncDspARGBInitMIPSdspR2();
+    }
+#endif
+  }
+  argb_last_cpuinfo_used = VP8GetCPUInfo;
+}
--- a/src/dsp/argb_mips_dsp_r2.c
+++ b/src/dsp/argb_mips_dsp_r2.c
@@ -0,0 +1,110 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   ARGB making functions (mips version).
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                     const uint8_t* b, int len, uint32_t* out) {
+  int temp0, temp1, temp2, temp3, offset;
+  const int rest = len & 1;
+  const uint32_t* const loop_end = out + len - rest;
+  const int step = 4;
+  __asm__ volatile (
+    "xor          %[offset],   %[offset], %[offset]    \n\t"
+    "beq          %[loop_end], %[out],    0f           \n\t"
+  "2:                                                  \n\t"
+    "lbux         %[temp0],    %[offset](%[a])         \n\t"
+    "lbux         %[temp1],    %[offset](%[r])         \n\t"
+    "lbux         %[temp2],    %[offset](%[g])         \n\t"
+    "lbux         %[temp3],    %[offset](%[b])         \n\t"
+    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+    "addiu        %[out],      %[out],    4            \n\t"
+    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+    "sw           %[temp0],    -4(%[out])              \n\t"
+    "addu         %[offset],   %[offset], %[step]      \n\t"
+    "bne          %[loop_end], %[out],    2b           \n\t"
+  "0:                                                  \n\t"
+    "beq          %[rest],     $zero,     1f           \n\t"
+    "lbux         %[temp0],    %[offset](%[a])         \n\t"
+    "lbux         %[temp1],    %[offset](%[r])         \n\t"
+    "lbux         %[temp2],    %[offset](%[g])         \n\t"
+    "lbux         %[temp3],    %[offset](%[b])         \n\t"
+    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+    "sw           %[temp0],    0(%[out])               \n\t"
+  "1:                                                  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
+    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      [loop_end]"r"(loop_end), [rest]"r"(rest)
+    : "memory"
+  );
+}
+
+static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                    int len, int step, uint32_t* out) {
+  int temp0, temp1, temp2, offset;
+  const int rest = len & 1;
+  const int a = 0xff;
+  const uint32_t* const loop_end = out + len - rest;
+  __asm__ volatile (
+    "xor          %[offset],   %[offset], %[offset]    \n\t"
+    "beq          %[loop_end], %[out],    0f           \n\t"
+  "2:                                                  \n\t"
+    "lbux         %[temp0],    %[offset](%[r])         \n\t"
+    "lbux         %[temp1],    %[offset](%[g])         \n\t"
+    "lbux         %[temp2],    %[offset](%[b])         \n\t"
+    "ins          %[temp0],    %[a],      16,     16   \n\t"
+    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+    "addiu        %[out],      %[out],    4            \n\t"
+    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+    "sw           %[temp0],    -4(%[out])              \n\t"
+    "addu         %[offset],   %[offset], %[step]      \n\t"
+    "bne          %[loop_end], %[out],    2b           \n\t"
+  "0:                                                  \n\t"
+    "beq          %[rest],     $zero,     1f           \n\t"
+    "lbux         %[temp0],    %[offset](%[r])         \n\t"
+    "lbux         %[temp1],    %[offset](%[g])         \n\t"
+    "lbux         %[temp2],    %[offset](%[b])         \n\t"
+    "ins          %[temp0],    %[a],      16,     16   \n\t"
+    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+    "sw           %[temp0],    0(%[out])               \n\t"
+  "1:                                                  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [offset]"=&r"(offset), [out]"+&r"(out)
+    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      [loop_end]"r"(loop_end), [rest]"r"(rest)
+    : "memory"
+  );
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspARGBInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
+  VP8PackARGB = PackARGB;
+  VP8PackRGB = PackRGB;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8EncDspARGBInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
--- a/src/dsp/argb_sse2.c
+++ b/src/dsp/argb_sse2.c
@@ -0,0 +1,67 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   ARGB making functions (SSE2 version).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+#include <string.h>
+
+static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
+  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
+}
+
+static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                     const uint8_t* b, int len, uint32_t* out) {
+  if (g == r + 1) {  // RGBA input order. Need to swap R and B.
+    int i = 0;
+    const int len_max = len & ~3;  // max length processed in main loop
+    const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
+    assert(b == r + 2);
+    assert(a == r + 3);
+    for (; i < len_max; i += 4) {
+      const __m128i A = _mm_loadu_si128((const __m128i*)(r + 4 * i));
+      const __m128i B = _mm_and_si128(A, red_blue_mask);     // R 0 B 0
+      const __m128i C = _mm_andnot_si128(red_blue_mask, A);  // 0 G 0 A
+      const __m128i D = _mm_shufflelo_epi16(B, _MM_SHUFFLE(2, 3, 0, 1));
+      const __m128i E = _mm_shufflehi_epi16(D, _MM_SHUFFLE(2, 3, 0, 1));
+      const __m128i F = _mm_or_si128(E, C);
+      _mm_storeu_si128((__m128i*)(out + i), F);
+    }
+    for (; i < len; ++i) {
+      out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
+    }
+  } else {
+    assert(g == b + 1);
+    assert(r == b + 2);
+    assert(a == b + 3);
+    memcpy(out, b, len * 4);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspARGBInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) {
+  VP8PackARGB = PackARGB;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8EncDspARGBInitSSE2)
+
+#endif  // WEBP_USE_SSE2
--- a/src/dsp/cost.c
+++ b/src/dsp/cost.c
@@ -9,8 +9,8 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dsp/dsp.h"
-#include "src/enc/cost_enc.h"
+#include "./dsp.h"
+#include "../enc/cost_enc.h"

 //------------------------------------------------------------------------------
 // Boolean-cost cost table
@@ -319,7 +319,7 @@ const uint8_t VP8EncBands[16 + 1] = {
 //------------------------------------------------------------------------------
 // Mode costs

-static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
  int n = res->first;
  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
  const int p0 = res->prob[n][ctx0][0];
@@ -354,8 +354,8 @@ static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
  return cost;
 }

-static void SetResidualCoeffs_C(const int16_t* const coeffs,
-                                VP8Residual* const res) {
+static void SetResidualCoeffs(const int16_t* const coeffs,
+                              VP8Residual* const res) {
  int n;
  res->last = -1;
  assert(res->first == 0 || coeffs[0] == 0);
@@ -384,8 +384,8 @@ static volatile VP8CPUInfo cost_last_cpuinfo_used =
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
  if (cost_last_cpuinfo_used == VP8GetCPUInfo) return;

-  VP8GetResidualCost = GetResidualCost_C;
-  VP8SetResidualCoeffs = SetResidualCoeffs_C;
+  VP8GetResidualCost = GetResidualCost;
+  VP8SetResidualCoeffs = SetResidualCoeffs;

  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
  if (VP8GetCPUInfo != NULL) {
--- a/src/dsp/cost_mips32.c
+++ b/src/dsp/cost_mips32.c
@@ -9,13 +9,13 @@
 //
 // Author: Djordje Pesut (djordje.pesut@imgtec.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_MIPS32)

-#include "src/enc/cost_enc.h"
+#include "../enc/cost_enc.h"

-static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
  int temp0, temp1;
  int v_reg, ctx_reg;
  int n = res->first;
@@ -96,8 +96,8 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
  return cost;
 }

-static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
-                                     VP8Residual* const res) {
+static void SetResidualCoeffs(const int16_t* const coeffs,
+                              VP8Residual* const res) {
  const int16_t* p_coeffs = (int16_t*)coeffs;
  int temp0, temp1, temp2, n, n1;
  assert(res->first == 0 || coeffs[0] == 0);
@@ -143,8 +143,8 @@ static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
 extern void VP8EncDspCostInitMIPS32(void);

 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) {
-  VP8GetResidualCost = GetResidualCost_MIPS32;
-  VP8SetResidualCoeffs = SetResidualCoeffs_MIPS32;
+  VP8GetResidualCost = GetResidualCost;
+  VP8SetResidualCoeffs = SetResidualCoeffs;
 }

 #else  // !WEBP_USE_MIPS32
--- a/src/dsp/cost_mips_dsp_r2.c
+++ b/src/dsp/cost_mips_dsp_r2.c
@@ -9,13 +9,13 @@
 //
 // Author: Djordje Pesut (djordje.pesut@imgtec.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_MIPS_DSP_R2)

-#include "src/enc/cost_enc.h"
+#include "../enc/cost_enc.h"

-static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
  int temp0, temp1;
  int v_reg, ctx_reg;
  int n = res->first;
@@ -97,7 +97,7 @@ static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
 extern void VP8EncDspCostInitMIPSdspR2(void);

 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void) {
-  VP8GetResidualCost = GetResidualCost_MIPSdspR2;
+  VP8GetResidualCost = GetResidualCost;
 }

 #else  // !WEBP_USE_MIPS_DSP_R2
--- a/src/dsp/cost_sse2.c
+++ b/src/dsp/cost_sse2.c
@@ -11,19 +11,19 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>

-#include "src/enc/cost_enc.h"
-#include "src/enc/vp8i_enc.h"
-#include "src/utils/utils.h"
+#include "../enc/cost_enc.h"
+#include "../enc/vp8i_enc.h"
+#include "../utils/utils.h"

 //------------------------------------------------------------------------------

-static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
-                                   VP8Residual* const res) {
+static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
+                                  VP8Residual* const res) {
  const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
  const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
  // Use SSE2 to compare 16 values with a single instruction.
@@ -42,7 +42,7 @@ static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
  res->coeffs = coeffs;
 }

-static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
+static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
  uint8_t levels[16], ctxs[16];
  uint16_t abs_levels[16];
  int n = res->first;
@@ -108,8 +108,8 @@ static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
 extern void VP8EncDspCostInitSSE2(void);

 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {
-  VP8SetResidualCoeffs = SetResidualCoeffs_SSE2;
-  VP8GetResidualCost = GetResidualCost_SSE2;
+  VP8SetResidualCoeffs = SetResidualCoeffsSSE2;
+  VP8GetResidualCost = GetResidualCostSSE2;
 }

 #else  // !WEBP_USE_SSE2
--- a/src/dsp/cpu.c
+++ b/src/dsp/cpu.c
@@ -11,7 +11,7 @@
 //
 // Author: Christian Duvivier (cduvivier@google.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_HAVE_NEON_RTCD)
 #include <stdio.h>
@@ -143,7 +143,7 @@ static int x86CPUInfo(CPUFeature feature) {
    return !!(cpu_info[2] & (1 << 0));
  }
  if (feature == kSlowSSSE3) {
-    if (is_intel && (cpu_info[2] & (1 << 9))) {   // SSSE3?
+    if (is_intel && (cpu_info[2] & (1 << 0))) {   // SSSE3?
      return CheckSlowModel(cpu_info[0]);
    }
    return 0;
--- a/src/dsp/dec.c
+++ b/src/dsp/dec.c
@@ -11,11 +11,9 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include <assert.h>
-
-#include "src/dsp/dsp.h"
-#include "src/dec/vp8i_dec.h"
-#include "src/utils/utils.h"
+#include "./dsp.h"
+#include "../dec/vp8i_dec.h"
+#include "../utils/utils.h"

 //------------------------------------------------------------------------------

@@ -27,7 +25,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 // Transforms (Paragraph 14.4)

 #define STORE(x, y, v) \
-  dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
+  dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))

 #define STORE2(y, dc, d, c) do {    \
  const int DC = (dc);              \
@@ -40,8 +38,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 #define MUL1(a) ((((a) * 20091) >> 16) + (a))
 #define MUL2(a) (((a) * 35468) >> 16)

-#if !WEBP_NEON_OMIT_C_CODE
-static void TransformOne_C(const int16_t* in, uint8_t* dst) {
+static void TransformOne(const int16_t* in, uint8_t* dst) {
  int C[4 * 4], *tmp;
  int i;
  tmp = C;
@@ -81,7 +78,7 @@ static void TransformOne_C(const int16_t* in, uint8_t* dst) {
 }

 // Simplified transform when only in[0], in[1] and in[4] are non-zero
-static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
  const int a = in[0] + 4;
  const int c4 = MUL2(in[4]);
  const int d4 = MUL1(in[4]);
@@ -96,21 +93,19 @@ static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
 #undef MUL2
 #undef STORE2

-static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
-  TransformOne_C(in, dst);
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
  if (do_two) {
-    TransformOne_C(in + 16, dst + 4);
+    TransformOne(in + 16, dst + 4);
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-static void TransformUV_C(const int16_t* in, uint8_t* dst) {
+static void TransformUV(const int16_t* in, uint8_t* dst) {
  VP8Transform(in + 0 * 16, dst, 1);
  VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
 }

-#if !WEBP_NEON_OMIT_C_CODE
-static void TransformDC_C(const int16_t* in, uint8_t* dst) {
+static void TransformDC(const int16_t* in, uint8_t* dst) {
  const int DC = in[0] + 4;
  int i, j;
  for (j = 0; j < 4; ++j) {
@@ -119,9 +114,8 @@ static void TransformDC_C(const int16_t* in, uint8_t* dst) {
    }
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
+static void TransformDCUV(const int16_t* in, uint8_t* dst) {
  if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
  if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
  if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
@@ -133,8 +127,7 @@ static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
 //------------------------------------------------------------------------------
 // Paragraph 14.3

-#if !WEBP_NEON_OMIT_C_CODE
-static void TransformWHT_C(const int16_t* in, int16_t* out) {
+static void TransformWHT(const int16_t* in, int16_t* out) {
  int tmp[16];
  int i;
  for (i = 0; i < 4; ++i) {
@@ -160,7 +153,6 @@ static void TransformWHT_C(const int16_t* in, int16_t* out) {
    out += 64;
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

 void (*VP8TransformWHT)(const int16_t* in, int16_t* out);

@@ -169,7 +161,6 @@ void (*VP8TransformWHT)(const int16_t* in, int16_t* out);

 #define DST(x, y) dst[(x) + (y) * BPS]

-#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
  const uint8_t* top = dst - BPS;
  const uint8_t* const clip0 = VP8kclip1 - top[-1];
@@ -183,21 +174,21 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
    dst += BPS;
  }
 }
-static void TM4_C(uint8_t* dst)   { TrueMotion(dst, 4); }
-static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
-static void TM16_C(uint8_t* dst)  { TrueMotion(dst, 16); }
+static void TM4(uint8_t* dst)   { TrueMotion(dst, 4); }
+static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
+static void TM16(uint8_t* dst)  { TrueMotion(dst, 16); }

 //------------------------------------------------------------------------------
 // 16x16

-static void VE16_C(uint8_t* dst) {     // vertical
+static void VE16(uint8_t* dst) {     // vertical
  int j;
  for (j = 0; j < 16; ++j) {
    memcpy(dst + j * BPS, dst - BPS, 16);
  }
 }

-static void HE16_C(uint8_t* dst) {     // horizontal
+static void HE16(uint8_t* dst) {     // horizontal
  int j;
  for (j = 16; j > 0; --j) {
    memset(dst, dst[-1], 16);
@@ -212,7 +203,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
  }
 }

-static void DC16_C(uint8_t* dst) {    // DC
+static void DC16(uint8_t* dst) {    // DC
  int DC = 16;
  int j;
  for (j = 0; j < 16; ++j) {
@@ -221,7 +212,7 @@ static void DC16_C(uint8_t* dst) {    // DC
  Put16(DC >> 5, dst);
 }

-static void DC16NoTop_C(uint8_t* dst) {   // DC with top samples not available
+static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
  int DC = 8;
  int j;
  for (j = 0; j < 16; ++j) {
@@ -230,7 +221,7 @@ static void DC16NoTop_C(uint8_t* dst) {   // DC with top samples not available
  Put16(DC >> 4, dst);
 }

-static void DC16NoLeft_C(uint8_t* dst) {  // DC with left samples not available
+static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
  int DC = 8;
  int i;
  for (i = 0; i < 16; ++i) {
@@ -239,10 +230,9 @@ static void DC16NoLeft_C(uint8_t* dst) {  // DC with left samples not available
  Put16(DC >> 4, dst);
 }

-static void DC16NoTopLeft_C(uint8_t* dst) {  // DC with no top and left samples
+static void DC16NoTopLeft(uint8_t* dst) {  // DC with no top and left samples
  Put16(0x80, dst);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

 VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];

@@ -252,8 +242,7 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
 #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)

-#if !WEBP_NEON_OMIT_C_CODE
-static void VE4_C(uint8_t* dst) {    // vertical
+static void VE4(uint8_t* dst) {    // vertical
  const uint8_t* top = dst - BPS;
  const uint8_t vals[4] = {
    AVG3(top[-1], top[0], top[1]),
@@ -266,9 +255,8 @@ static void VE4_C(uint8_t* dst) {    // vertical
    memcpy(dst + i * BPS, vals, sizeof(vals));
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-static void HE4_C(uint8_t* dst) {    // horizontal
+static void HE4(uint8_t* dst) {    // horizontal
  const int A = dst[-1 - BPS];
  const int B = dst[-1];
  const int C = dst[-1 + BPS];
@@ -280,8 +268,7 @@ static void HE4_C(uint8_t* dst) {    // horizontal
  WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
 }

-#if !WEBP_NEON_OMIT_C_CODE
-static void DC4_C(uint8_t* dst) {   // DC
+static void DC4(uint8_t* dst) {   // DC
  uint32_t dc = 4;
  int i;
  for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
@@ -289,7 +276,7 @@ static void DC4_C(uint8_t* dst) {   // DC
  for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
 }

-static void RD4_C(uint8_t* dst) {   // Down-right
+static void RD4(uint8_t* dst) {   // Down-right
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@@ -308,7 +295,7 @@ static void RD4_C(uint8_t* dst) {   // Down-right
                                      DST(3, 0) = AVG3(D, C, B);
 }

-static void LD4_C(uint8_t* dst) {   // Down-Left
+static void LD4(uint8_t* dst) {   // Down-Left
  const int A = dst[0 - BPS];
  const int B = dst[1 - BPS];
  const int C = dst[2 - BPS];
@@ -325,9 +312,8 @@ static void LD4_C(uint8_t* dst) {   // Down-Left
                          DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
                                      DST(3, 3) = AVG3(G, H, H);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-static void VR4_C(uint8_t* dst) {   // Vertical-Right
+static void VR4(uint8_t* dst) {   // Vertical-Right
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@@ -349,7 +335,7 @@ static void VR4_C(uint8_t* dst) {   // Vertical-Right
  DST(3, 1) =             AVG3(B, C, D);
 }

-static void VL4_C(uint8_t* dst) {   // Vertical-Left
+static void VL4(uint8_t* dst) {   // Vertical-Left
  const int A = dst[0 - BPS];
  const int B = dst[1 - BPS];
  const int C = dst[2 - BPS];
@@ -371,7 +357,7 @@ static void VL4_C(uint8_t* dst) {   // Vertical-Left
              DST(3, 3) = AVG3(F, G, H);
 }

-static void HU4_C(uint8_t* dst) {   // Horizontal-Up
+static void HU4(uint8_t* dst) {   // Horizontal-Up
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@@ -386,7 +372,7 @@ static void HU4_C(uint8_t* dst) {   // Horizontal-Up
    DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }

-static void HD4_C(uint8_t* dst) {  // Horizontal-Down
+static void HD4(uint8_t* dst) {  // Horizontal-Down
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
  const int K = dst[-1 + 2 * BPS];
@@ -418,15 +404,14 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES];
 //------------------------------------------------------------------------------
 // Chroma

-#if !WEBP_NEON_OMIT_C_CODE
-static void VE8uv_C(uint8_t* dst) {    // vertical
+static void VE8uv(uint8_t* dst) {    // vertical
  int j;
  for (j = 0; j < 8; ++j) {
    memcpy(dst + j * BPS, dst - BPS, 8);
  }
 }

-static void HE8uv_C(uint8_t* dst) {    // horizontal
+static void HE8uv(uint8_t* dst) {    // horizontal
  int j;
  for (j = 0; j < 8; ++j) {
    memset(dst, dst[-1], 8);
@@ -442,7 +427,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
  }
 }

-static void DC8uv_C(uint8_t* dst) {     // DC
+static void DC8uv(uint8_t* dst) {     // DC
  int dc0 = 8;
  int i;
  for (i = 0; i < 8; ++i) {
@@ -451,7 +436,7 @@ static void DC8uv_C(uint8_t* dst) {     // DC
  Put8x8uv(dc0 >> 4, dst);
 }

-static void DC8uvNoLeft_C(uint8_t* dst) {   // DC with no left samples
+static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
  int dc0 = 4;
  int i;
  for (i = 0; i < 8; ++i) {
@@ -460,7 +445,7 @@ static void DC8uvNoLeft_C(uint8_t* dst) {   // DC with no left samples
  Put8x8uv(dc0 >> 3, dst);
 }

-static void DC8uvNoTop_C(uint8_t* dst) {  // DC with no top samples
+static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
  int dc0 = 4;
  int i;
  for (i = 0; i < 8; ++i) {
@@ -469,19 +454,17 @@ static void DC8uvNoTop_C(uint8_t* dst) {  // DC with no top samples
  Put8x8uv(dc0 >> 3, dst);
 }

-static void DC8uvNoTopLeft_C(uint8_t* dst) {    // DC with nothing
+static void DC8uvNoTopLeft(uint8_t* dst) {    // DC with nothing
  Put8x8uv(0x80, dst);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

 VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];

 //------------------------------------------------------------------------------
 // Edge filtering functions

-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 // 4 pixels in, 2 pixels out
-static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];  // in [-893,892]
  const int a1 = VP8ksclip2[(a + 4) >> 3];            // in [-16,15]
@@ -491,7 +474,7 @@ static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
 }

 // 4 pixels in, 4 pixels out
-static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
  const int a = 3 * (q0 - p0);
  const int a1 = VP8ksclip2[(a + 4) >> 3];
@@ -504,7 +487,7 @@ static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
 }

 // 6 pixels in, 6 pixels out
-static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
+static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
  const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
  const int q0 = p[0], q1 = p[step], q2 = p[2*step];
  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
@@ -520,22 +503,18 @@ static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
  p[ 2*step] = VP8kclip1[q2 - a3];
 }

-static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
  return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC

-#if !WEBP_NEON_OMIT_C_CODE
-static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
-                                      int step, int t, int it) {
+static WEBP_INLINE int needs_filter2(const uint8_t* p,
+                                     int step, int t, int it) {
  const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
  const int p0 = p[-step], q0 = p[0];
  const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
@@ -544,159 +523,140 @@ static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
         VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
         VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC

 //------------------------------------------------------------------------------
 // Simple In-loop filtering (Paragraph 15.2)

-#if !WEBP_NEON_OMIT_C_CODE
-static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
  int i;
  const int thresh2 = 2 * thresh + 1;
  for (i = 0; i < 16; ++i) {
-    if (NeedsFilter_C(p + i, stride, thresh2)) {
-      DoFilter2_C(p + i, stride);
+    if (needs_filter(p + i, stride, thresh2)) {
+      do_filter2(p + i, stride);
    }
  }
 }

-static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
  int i;
  const int thresh2 = 2 * thresh + 1;
  for (i = 0; i < 16; ++i) {
-    if (NeedsFilter_C(p + i * stride, 1, thresh2)) {
-      DoFilter2_C(p + i * stride, 1);
+    if (needs_filter(p + i * stride, 1, thresh2)) {
+      do_filter2(p + i * stride, 1);
    }
  }
 }

-static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4 * stride;
-    SimpleVFilter16_C(p, stride, thresh);
+    SimpleVFilter16(p, stride, thresh);
  }
 }

-static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4;
-    SimpleHFilter16_C(p, stride, thresh);
+    SimpleHFilter16(p, stride, thresh);
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

 //------------------------------------------------------------------------------
 // Complex In-loop filtering (Paragraph 15.3)

-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
-                                       int hstride, int vstride, int size,
-                                       int thresh, int ithresh,
-                                       int hev_thresh) {
+static WEBP_INLINE void FilterLoop26(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
-    if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
-      if (Hev(p, hstride, hev_thresh)) {
-        DoFilter2_C(p, hstride);
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
      } else {
-        DoFilter6_C(p, hstride);
+        do_filter6(p, hstride);
      }
    }
    p += vstride;
  }
 }

-static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
-                                       int hstride, int vstride, int size,
-                                       int thresh, int ithresh,
-                                       int hev_thresh) {
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
  const int thresh2 = 2 * thresh + 1;
  while (size-- > 0) {
-    if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
-      if (Hev(p, hstride, hev_thresh)) {
-        DoFilter2_C(p, hstride);
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
      } else {
-        DoFilter4_C(p, hstride);
+        do_filter4(p, hstride);
      }
    }
    p += vstride;
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC

-#if !WEBP_NEON_OMIT_C_CODE
 // on macroblock edges
-static void VFilter16_C(uint8_t* p, int stride,
-                        int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
 }

-static void HFilter16_C(uint8_t* p, int stride,
-                        int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
 }

 // on three inner edges
-static void VFilter16i_C(uint8_t* p, int stride,
-                         int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4 * stride;
-    FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static void HFilter16i_C(uint8_t* p, int stride,
-                         int thresh, int ithresh, int hev_thresh) {
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4;
-    FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC

-#if !WEBP_NEON_OMIT_C_CODE
 // 8-pixels wide variant, for chroma filtering
-static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
-  FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
-  FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC

-#if !WEBP_NEON_OMIT_C_CODE
-static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
-                        int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
-                        int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC

 //------------------------------------------------------------------------------

-static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
-                               int dst_stride) {
+static void DitherCombine8x8(const uint8_t* dither, uint8_t* dst,
+                             int dst_stride) {
  int i, j;
  for (j = 0; j < 8; ++j) {
    for (i = 0; i < 8; ++i) {
@@ -749,66 +709,54 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {

  VP8InitClipTables();

-#if !WEBP_NEON_OMIT_C_CODE
-  VP8TransformWHT = TransformWHT_C;
-  VP8Transform = TransformTwo_C;
-  VP8TransformDC = TransformDC_C;
-  VP8TransformAC3 = TransformAC3_C;
-#endif
-  VP8TransformUV = TransformUV_C;
-  VP8TransformDCUV = TransformDCUV_C;
+  VP8TransformWHT = TransformWHT;
+  VP8Transform = TransformTwo;
+  VP8TransformUV = TransformUV;
+  VP8TransformDC = TransformDC;
+  VP8TransformDCUV = TransformDCUV;
+  VP8TransformAC3 = TransformAC3;

-#if !WEBP_NEON_OMIT_C_CODE
-  VP8VFilter16 = VFilter16_C;
-  VP8VFilter16i = VFilter16i_C;
-  VP8HFilter16 = HFilter16_C;
-  VP8VFilter8 = VFilter8_C;
-  VP8VFilter8i = VFilter8i_C;
-  VP8SimpleVFilter16 = SimpleVFilter16_C;
-  VP8SimpleHFilter16 = SimpleHFilter16_C;
-  VP8SimpleVFilter16i = SimpleVFilter16i_C;
-  VP8SimpleHFilter16i = SimpleHFilter16i_C;
-#endif
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;

-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-  VP8HFilter16i = HFilter16i_C;
-  VP8HFilter8 = HFilter8_C;
-  VP8HFilter8i = HFilter8i_C;
-#endif
+  VP8PredLuma4[0] = DC4;
+  VP8PredLuma4[1] = TM4;
+  VP8PredLuma4[2] = VE4;
+  VP8PredLuma4[3] = HE4;
+  VP8PredLuma4[4] = RD4;
+  VP8PredLuma4[5] = VR4;
+  VP8PredLuma4[6] = LD4;
+  VP8PredLuma4[7] = VL4;
+  VP8PredLuma4[8] = HD4;
+  VP8PredLuma4[9] = HU4;

-#if !WEBP_NEON_OMIT_C_CODE
-  VP8PredLuma4[0] = DC4_C;
-  VP8PredLuma4[1] = TM4_C;
-  VP8PredLuma4[2] = VE4_C;
-  VP8PredLuma4[4] = RD4_C;
-  VP8PredLuma4[6] = LD4_C;
-#endif
+  VP8PredLuma16[0] = DC16;
+  VP8PredLuma16[1] = TM16;
+  VP8PredLuma16[2] = VE16;
+  VP8PredLuma16[3] = HE16;
+  VP8PredLuma16[4] = DC16NoTop;
+  VP8PredLuma16[5] = DC16NoLeft;
+  VP8PredLuma16[6] = DC16NoTopLeft;

-  VP8PredLuma4[3] = HE4_C;
-  VP8PredLuma4[5] = VR4_C;
-  VP8PredLuma4[7] = VL4_C;
-  VP8PredLuma4[8] = HD4_C;
-  VP8PredLuma4[9] = HU4_C;
+  VP8PredChroma8[0] = DC8uv;
+  VP8PredChroma8[1] = TM8uv;
+  VP8PredChroma8[2] = VE8uv;
+  VP8PredChroma8[3] = HE8uv;
+  VP8PredChroma8[4] = DC8uvNoTop;
+  VP8PredChroma8[5] = DC8uvNoLeft;
+  VP8PredChroma8[6] = DC8uvNoTopLeft;

-#if !WEBP_NEON_OMIT_C_CODE
-  VP8PredLuma16[0] = DC16_C;
-  VP8PredLuma16[1] = TM16_C;
-  VP8PredLuma16[2] = VE16_C;
-  VP8PredLuma16[3] = HE16_C;
-  VP8PredLuma16[4] = DC16NoTop_C;
-  VP8PredLuma16[5] = DC16NoLeft_C;
-  VP8PredLuma16[6] = DC16NoTopLeft_C;
-
-  VP8PredChroma8[0] = DC8uv_C;
-  VP8PredChroma8[1] = TM8uv_C;
-  VP8PredChroma8[2] = VE8uv_C;
-  VP8PredChroma8[3] = HE8uv_C;
-  VP8PredChroma8[4] = DC8uvNoTop_C;
-  VP8PredChroma8[5] = DC8uvNoLeft_C;
-  VP8PredChroma8[6] = DC8uvNoTopLeft_C;
-#endif
-
-  VP8DitherCombine8x8 = DitherCombine8x8_C;
+  VP8DitherCombine8x8 = DitherCombine8x8;

  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
  if (VP8GetCPUInfo != NULL) {
@@ -822,6 +770,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
 #endif
    }
 #endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8DspInitNEON();
+    }
+#endif
 #if defined(WEBP_USE_MIPS32)
    if (VP8GetCPUInfo(kMIPS32)) {
      VP8DspInitMIPS32();
@@ -838,57 +791,5 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
    }
 #endif
  }
-
-#if defined(WEBP_USE_NEON)
-  if (WEBP_NEON_OMIT_C_CODE ||
-      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-    VP8DspInitNEON();
-  }
-#endif
-
-  assert(VP8TransformWHT != NULL);
-  assert(VP8Transform != NULL);
-  assert(VP8TransformDC != NULL);
-  assert(VP8TransformAC3 != NULL);
-  assert(VP8TransformUV != NULL);
-  assert(VP8TransformDCUV != NULL);
-  assert(VP8VFilter16 != NULL);
-  assert(VP8HFilter16 != NULL);
-  assert(VP8VFilter8 != NULL);
-  assert(VP8HFilter8 != NULL);
-  assert(VP8VFilter16i != NULL);
-  assert(VP8HFilter16i != NULL);
-  assert(VP8VFilter8i != NULL);
-  assert(VP8HFilter8i != NULL);
-  assert(VP8SimpleVFilter16 != NULL);
-  assert(VP8SimpleHFilter16 != NULL);
-  assert(VP8SimpleVFilter16i != NULL);
-  assert(VP8SimpleHFilter16i != NULL);
-  assert(VP8PredLuma4[0] != NULL);
-  assert(VP8PredLuma4[1] != NULL);
-  assert(VP8PredLuma4[2] != NULL);
-  assert(VP8PredLuma4[3] != NULL);
-  assert(VP8PredLuma4[4] != NULL);
-  assert(VP8PredLuma4[5] != NULL);
-  assert(VP8PredLuma4[6] != NULL);
-  assert(VP8PredLuma4[7] != NULL);
-  assert(VP8PredLuma4[8] != NULL);
-  assert(VP8PredLuma4[9] != NULL);
-  assert(VP8PredLuma16[0] != NULL);
-  assert(VP8PredLuma16[1] != NULL);
-  assert(VP8PredLuma16[2] != NULL);
-  assert(VP8PredLuma16[3] != NULL);
-  assert(VP8PredLuma16[4] != NULL);
-  assert(VP8PredLuma16[5] != NULL);
-  assert(VP8PredLuma16[6] != NULL);
-  assert(VP8PredChroma8[0] != NULL);
-  assert(VP8PredChroma8[1] != NULL);
-  assert(VP8PredChroma8[2] != NULL);
-  assert(VP8PredChroma8[3] != NULL);
-  assert(VP8PredChroma8[4] != NULL);
-  assert(VP8PredChroma8[5] != NULL);
-  assert(VP8PredChroma8[6] != NULL);
-  assert(VP8DitherCombine8x8 != NULL);
-
  dec_last_cpuinfo_used = VP8GetCPUInfo;
 }
--- a/src/dsp/dec_clip_tables.c
+++ b/src/dsp/dec_clip_tables.c
@@ -11,14 +11,11 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

-// define to 0 to have run-time table initialization
-#if !defined(USE_STATIC_TABLES)
-#define USE_STATIC_TABLES 1   // ALTERNATE_CODE
-#endif
+#define USE_STATIC_TABLES     // undefine to have run-time table initialization

-#if (USE_STATIC_TABLES == 1)
+#ifdef USE_STATIC_TABLES

 static const uint8_t abs0[255 + 255 + 1] = {
  0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
@@ -340,7 +337,7 @@ static uint8_t clip1[255 + 511 + 1];
 // and make sure it's set to true _last_ (so as to be thread-safe)
 static volatile int tables_ok = 0;

-#endif    // USE_STATIC_TABLES
+#endif

 const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
 const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
@@ -348,7 +345,7 @@ const uint8_t* const VP8kclip1 = &clip1[255];
 const uint8_t* const VP8kabs0 = &abs0[255];

 WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
-#if (USE_STATIC_TABLES == 0)
+#if !defined(USE_STATIC_TABLES)
  int i;
  if (!tables_ok) {
    for (i = -255; i <= 255; ++i) {
--- a/src/dsp/dec_mips32.c
+++ b/src/dsp/dec_mips32.c
@@ -12,11 +12,11 @@
 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_MIPS32)

-#include "src/dsp/mips_macro.h"
+#include "./mips_macro.h"

 static const int kC1 = 20091 + (1 << 16);
 static const int kC2 = 35468;
--- a/src/dsp/dec_mips_dsp_r2.c
+++ b/src/dsp/dec_mips_dsp_r2.c
@@ -12,11 +12,11 @@
 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_MIPS_DSP_R2)

-#include "src/dsp/mips_macro.h"
+#include "./mips_macro.h"

 static const int kC1 = 20091 + (1 << 16);
 static const int kC2 = 35468;
--- a/src/dsp/dec_msa.c
+++ b/src/dsp/dec_msa.c
@@ -12,11 +12,11 @@
 // Author(s):  Prashant Patil   (prashant.patil@imgtec.com)


-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_MSA)

-#include "src/dsp/msa_macro.h"
+#include "./msa_macro.h"

 //------------------------------------------------------------------------------
 // Transforms
@@ -222,7 +222,6 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
  const v16i8 cnst4b = __msa_ldi_b(4);                        \
  const v16i8 cnst3b = __msa_ldi_b(3);                        \
  const v8i16 cnst9h = __msa_ldi_h(9);                        \
-  const v8i16 cnst63h = __msa_ldi_h(63);                      \
                                                              \
  FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m);         \
  filt = __msa_subs_s_b(p1_m, q1_m);                          \
@@ -242,9 +241,9 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
  ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l);               \
  /* update q2/p2 */                                          \
  temp0 = filt_r * cnst9h;                                    \
-  temp1 = temp0 + cnst63h;                                    \
+  temp1 = ADDVI_H(temp0, 63);                                 \
  temp2 = filt_l * cnst9h;                                    \
-  temp3 = temp2 + cnst63h;                                    \
+  temp3 = ADDVI_H(temp2, 63);                                 \
  FILT2(q2_m, p2_m, q2, p2);                                  \
  /* update q1/p1 */                                          \
  temp1 = temp1 + temp0;                                      \
@@ -709,7 +708,7 @@ static void VE4(uint8_t* dst) {    // vertical
  const uint32_t val0 = LW(ptop + 0);
  const uint32_t val1 = LW(ptop + 4);
  uint32_t out;
-  v16u8 A = { 0 }, B, C, AC, B2, R;
+  v16u8 A, B, C, AC, B2, R;

  INSERT_W2_UB(val0, val1, A);
  B = SLDI_UB(A, A, 1);
@@ -726,7 +725,7 @@ static void RD4(uint8_t* dst) {   // Down-right
  uint32_t val0 = LW(ptop + 0);
  uint32_t val1 = LW(ptop + 4);
  uint32_t val2, val3;
-  v16u8 A, B, C, AC, B2, R, A1 = { 0 };
+  v16u8 A, B, C, AC, B2, R, A1;

  INSERT_W2_UB(val0, val1, A1);
  A = SLDI_UB(A1, A1, 12);
@@ -754,7 +753,7 @@ static void LD4(uint8_t* dst) {   // Down-Left
  uint32_t val0 = LW(ptop + 0);
  uint32_t val1 = LW(ptop + 4);
  uint32_t val2, val3;
-  v16u8 A = { 0 }, B, C, AC, B2, R;
+  v16u8 A, B, C, AC, B2, R;

  INSERT_W2_UB(val0, val1, A);
  B = SLDI_UB(A, A, 1);
--- a/src/dsp/dec_neon.c
+++ b/src/dsp/dec_neon.c
--- a/src/dsp/dec_sse2.c
+++ b/src/dsp/dec_sse2.c
@@ -12,25 +12,23 @@
 // Author: somnath@google.com (Somnath Banerjee)
 //         cduvivier@google.com (Christian Duvivier)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_SSE2)

 // The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
 // one it seems => disable it by default. Uncomment the following to enable:
-#if !defined(USE_TRANSFORM_AC3)
-#define USE_TRANSFORM_AC3 0   // ALTERNATE_CODE
-#endif
+// #define USE_TRANSFORM_AC3

 #include <emmintrin.h>
-#include "src/dsp/common_sse2.h"
-#include "src/dec/vp8i_dec.h"
-#include "src/utils/utils.h"
+#include "./common_sse2.h"
+#include "../dec/vp8i_dec.h"
+#include "../utils/utils.h"

 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)

-static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
+static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
  // This implementation makes use of 16-bit fixed point versions of two
  // multiply constants:
  //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
@@ -195,7 +193,7 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
  }
 }

-#if (USE_TRANSFORM_AC3 == 1)
+#if defined(USE_TRANSFORM_AC3)
 #define MUL(a, b) (((a) * (b)) >> 16)
 static void TransformAC3(const int16_t* in, uint8_t* dst) {
  static const int kC1 = 20091 + (1 << 16);
@@ -250,7 +248,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
    _mm_subs_epu8((p), (q)))

 // Shift each byte of "x" by 3 bits while preserving by the sign bit.
-static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
+static WEBP_INLINE void SignedShift8b(__m128i* const x) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i lo_0 = _mm_unpacklo_epi8(zero, *x);
  const __m128i hi_0 = _mm_unpackhi_epi8(zero, *x);
@@ -260,8 +258,8 @@ static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
 }

 #define FLIP_SIGN_BIT2(a, b) {                                                 \
-  (a) = _mm_xor_si128(a, sign_bit);                                            \
-  (b) = _mm_xor_si128(b, sign_bit);                                            \
+  a = _mm_xor_si128(a, sign_bit);                                              \
+  b = _mm_xor_si128(b, sign_bit);                                              \
 }

 #define FLIP_SIGN_BIT4(a, b, c, d) {                                           \
@@ -270,11 +268,11 @@ static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
 }

 // input/output is uint8_t
-static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
-                                       const __m128i* const p0,
-                                       const __m128i* const q0,
-                                       const __m128i* const q1,
-                                       int hev_thresh, __m128i* const not_hev) {
+static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
+                                  const __m128i* const p0,
+                                  const __m128i* const q0,
+                                  const __m128i* const q1,
+                                  int hev_thresh, __m128i* const not_hev) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i t_1 = MM_ABS(*p1, *p0);
  const __m128i t_2 = MM_ABS(*q1, *q0);
@@ -287,11 +285,11 @@ static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
 }

 // input pixels are int8_t
-static WEBP_INLINE void GetBaseDelta_SSE2(const __m128i* const p1,
-                                          const __m128i* const p0,
-                                          const __m128i* const q0,
-                                          const __m128i* const q1,
-                                          __m128i* const delta) {
+static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
+                                     const __m128i* const p0,
+                                     const __m128i* const q0,
+                                     const __m128i* const q1,
+                                     __m128i* const delta) {
  // beware of addition order, for saturation!
  const __m128i p1_q1 = _mm_subs_epi8(*p1, *q1);   // p1 - q1
  const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0);   // q0 - p0
@@ -302,16 +300,15 @@ static WEBP_INLINE void GetBaseDelta_SSE2(const __m128i* const p1,
 }

 // input and output are int8_t
-static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
-                                            __m128i* const q0,
-                                            const __m128i* const fl) {
+static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
+                                       const __m128i* const fl) {
  const __m128i k3 = _mm_set1_epi8(3);
  const __m128i k4 = _mm_set1_epi8(4);
  __m128i v3 = _mm_adds_epi8(*fl, k3);
  __m128i v4 = _mm_adds_epi8(*fl, k4);

-  SignedShift8b_SSE2(&v4);             // v4 >> 3
-  SignedShift8b_SSE2(&v3);             // v3 >> 3
+  SignedShift8b(&v4);                  // v4 >> 3
+  SignedShift8b(&v3);                  // v3 >> 3
  *q0 = _mm_subs_epi8(*q0, v4);        // q0 -= v4
  *p0 = _mm_adds_epi8(*p0, v3);        // p0 += v3
 }
@@ -320,9 +317,9 @@ static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
 // Update operations:
 // q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
 // Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
-static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
-                                           const __m128i* const a0_lo,
-                                           const __m128i* const a0_hi) {
+static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
+                                      const __m128i* const a0_lo,
+                                      const __m128i* const a0_hi) {
  const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
  const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
  const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
@@ -333,11 +330,11 @@ static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
 }

 // input pixels are uint8_t
-static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
-                                         const __m128i* const p0,
-                                         const __m128i* const q0,
-                                         const __m128i* const q1,
-                                         int thresh, __m128i* const mask) {
+static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
+                                    const __m128i* const p0,
+                                    const __m128i* const q0,
+                                    const __m128i* const q1,
+                                    int thresh, __m128i* const mask) {
  const __m128i m_thresh = _mm_set1_epi8(thresh);
  const __m128i t1 = MM_ABS(*p1, *q1);        // abs(p1 - q1)
  const __m128i kFE = _mm_set1_epi8(0xFE);
@@ -356,29 +353,28 @@ static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
 // Edge filtering functions

 // Applies filter on 2 pixels (p0 and q0)
-static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,
-                                       __m128i* const q0, __m128i* const q1,
-                                       int thresh) {
+static WEBP_INLINE void DoFilter2(__m128i* const p1, __m128i* const p0,
+                                  __m128i* const q0, __m128i* const q1,
+                                  int thresh) {
  __m128i a, mask;
  const __m128i sign_bit = _mm_set1_epi8(0x80);
-  // convert p1/q1 to int8_t (for GetBaseDelta_SSE2)
+  // convert p1/q1 to int8_t (for GetBaseDelta)
  const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
  const __m128i q1s = _mm_xor_si128(*q1, sign_bit);

-  NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &mask);
+  NeedsFilter(p1, p0, q0, q1, thresh, &mask);

  FLIP_SIGN_BIT2(*p0, *q0);
-  GetBaseDelta_SSE2(&p1s, p0, q0, &q1s, &a);
+  GetBaseDelta(&p1s, p0, q0, &q1s, &a);
  a = _mm_and_si128(a, mask);     // mask filter values we don't care about
-  DoSimpleFilter_SSE2(p0, q0, &a);
+  DoSimpleFilter(p0, q0, &a);
  FLIP_SIGN_BIT2(*p0, *q0);
 }

 // Applies filter on 4 pixels (p1, p0, q0 and q1)
-static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
-                                       __m128i* const q0, __m128i* const q1,
-                                       const __m128i* const mask,
-                                       int hev_thresh) {
+static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
+                                  __m128i* const q0, __m128i* const q1,
+                                  const __m128i* const mask, int hev_thresh) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i sign_bit = _mm_set1_epi8(0x80);
  const __m128i k64 = _mm_set1_epi8(64);
@@ -388,7 +384,7 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
  __m128i t1, t2, t3;

  // compute hev mask
-  GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, &not_hev);
+  GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);

  // convert to signed values
  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
@@ -403,8 +399,8 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,

  t2 = _mm_adds_epi8(t1, k3);        // 3 * (q0 - p0) + hev(p1 - q1) + 3
  t3 = _mm_adds_epi8(t1, k4);        // 3 * (q0 - p0) + hev(p1 - q1) + 4
-  SignedShift8b_SSE2(&t2);           // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
-  SignedShift8b_SSE2(&t3);           // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
+  SignedShift8b(&t2);                // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
+  SignedShift8b(&t3);                // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
  *p0 = _mm_adds_epi8(*p0, t2);      // p0 += t2
  *q0 = _mm_subs_epi8(*q0, t3);      // q0 -= t3
  FLIP_SIGN_BIT2(*p0, *q0);
@@ -421,26 +417,25 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
 }

 // Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
-static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
-                                       __m128i* const p0, __m128i* const q0,
-                                       __m128i* const q1, __m128i* const q2,
-                                       const __m128i* const mask,
-                                       int hev_thresh) {
+static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
+                                  __m128i* const p0, __m128i* const q0,
+                                  __m128i* const q1, __m128i* const q2,
+                                  const __m128i* const mask, int hev_thresh) {
  const __m128i zero = _mm_setzero_si128();
  const __m128i sign_bit = _mm_set1_epi8(0x80);
  __m128i a, not_hev;

  // compute hev mask
-  GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, &not_hev);
+  GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);

  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
  FLIP_SIGN_BIT2(*p2, *q2);
-  GetBaseDelta_SSE2(p1, p0, q0, q1, &a);
+  GetBaseDelta(p1, p0, q0, q1, &a);

  { // do simple filter on pixels with hev
    const __m128i m = _mm_andnot_si128(not_hev, *mask);
    const __m128i f = _mm_and_si128(a, m);
-    DoSimpleFilter_SSE2(p0, q0, &f);
+    DoSimpleFilter(p0, q0, &f);
  }

  { // do strong filter on pixels with not hev
@@ -465,15 +460,15 @@ static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
    const __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo);  // Filter * 27 + 63
    const __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi);  // Filter * 27 + 63

-    Update2Pixels_SSE2(p2, q2, &a2_lo, &a2_hi);
-    Update2Pixels_SSE2(p1, q1, &a1_lo, &a1_hi);
-    Update2Pixels_SSE2(p0, q0, &a0_lo, &a0_hi);
+    Update2Pixels(p2, q2, &a2_lo, &a2_hi);
+    Update2Pixels(p1, q1, &a1_lo, &a1_hi);
+    Update2Pixels(p0, q0, &a0_lo, &a0_hi);
  }
 }

 // reads 8 rows across a vertical edge.
-static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
-                                     __m128i* const p, __m128i* const q) {
+static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
+                                __m128i* const p, __m128i* const q) {
  // A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
  // A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
  const __m128i A0 = _mm_set_epi32(
@@ -499,11 +494,11 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
  *q = _mm_unpackhi_epi32(C0, C1);
 }

-static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
-                                      const uint8_t* const r8,
-                                      int stride,
-                                      __m128i* const p1, __m128i* const p0,
-                                      __m128i* const q0, __m128i* const q1) {
+static WEBP_INLINE void Load16x4(const uint8_t* const r0,
+                                 const uint8_t* const r8,
+                                 int stride,
+                                 __m128i* const p1, __m128i* const p0,
+                                 __m128i* const q0, __m128i* const q1) {
  // Assume the pixels around the edge (|) are numbered as follows
  //                00 01 | 02 03
  //                10 11 | 12 13
@@ -519,8 +514,8 @@ static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
  // q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
  // p0 = f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
  // q1 = f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
-  Load8x4_SSE2(r0, stride, p1, q0);
-  Load8x4_SSE2(r8, stride, p0, q1);
+  Load8x4(r0, stride, p1, q0);
+  Load8x4(r8, stride, p0, q1);

  {
    // p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
@@ -536,8 +531,7 @@ static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
  }
 }

-static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
-                                      uint8_t* dst, int stride) {
+static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
  int i;
  for (i = 0; i < 4; ++i, dst += stride) {
    WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
@@ -546,12 +540,12 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
 }

 // Transpose back and store
-static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
-                                       const __m128i* const p0,
-                                       const __m128i* const q0,
-                                       const __m128i* const q1,
-                                       uint8_t* r0, uint8_t* r8,
-                                       int stride) {
+static WEBP_INLINE void Store16x4(const __m128i* const p1,
+                                  const __m128i* const p0,
+                                  const __m128i* const q0,
+                                  const __m128i* const q1,
+                                  uint8_t* r0, uint8_t* r8,
+                                  int stride) {
  __m128i t1, p1_s, p0_s, q0_s, q1_s;

  // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
@@ -578,55 +572,55 @@ static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
  p1_s = _mm_unpacklo_epi16(t1, q1_s);
  q1_s = _mm_unpackhi_epi16(t1, q1_s);

-  Store4x4_SSE2(&p0_s, r0, stride);
+  Store4x4(&p0_s, r0, stride);
  r0 += 4 * stride;
-  Store4x4_SSE2(&q0_s, r0, stride);
+  Store4x4(&q0_s, r0, stride);

-  Store4x4_SSE2(&p1_s, r8, stride);
+  Store4x4(&p1_s, r8, stride);
  r8 += 4 * stride;
-  Store4x4_SSE2(&q1_s, r8, stride);
+  Store4x4(&q1_s, r8, stride);
 }

 //------------------------------------------------------------------------------
 // Simple In-loop filtering (Paragraph 15.2)

-static void SimpleVFilter16_SSE2(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
  // Load
  __m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
  __m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
  __m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
  __m128i q1 = _mm_loadu_si128((__m128i*)&p[stride]);

-  DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
+  DoFilter2(&p1, &p0, &q0, &q1, thresh);

  // Store
  _mm_storeu_si128((__m128i*)&p[-stride], p0);
  _mm_storeu_si128((__m128i*)&p[0], q0);
 }

-static void SimpleHFilter16_SSE2(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
  __m128i p1, p0, q0, q1;

  p -= 2;  // beginning of p1

-  Load16x4_SSE2(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
-  DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
-  Store16x4_SSE2(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
+  Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
+  DoFilter2(&p1, &p0, &q0, &q1, thresh);
+  Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
 }

-static void SimpleVFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4 * stride;
-    SimpleVFilter16_SSE2(p, stride, thresh);
+    SimpleVFilter16(p, stride, thresh);
  }
 }

-static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
  int k;
  for (k = 3; k > 0; --k) {
    p += 4;
-    SimpleHFilter16_SSE2(p, stride, thresh);
+    SimpleHFilter16(p, stride, thresh);
  }
 }

@@ -634,60 +628,60 @@ static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
 // Complex In-loop filtering (Paragraph 15.3)

 #define MAX_DIFF1(p3, p2, p1, p0, m) do {                                      \
-  (m) = MM_ABS(p1, p0);                                                        \
-  (m) = _mm_max_epu8(m, MM_ABS(p3, p2));                                       \
-  (m) = _mm_max_epu8(m, MM_ABS(p2, p1));                                       \
+  m = MM_ABS(p1, p0);                                                          \
+  m = _mm_max_epu8(m, MM_ABS(p3, p2));                                         \
+  m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
 } while (0)

 #define MAX_DIFF2(p3, p2, p1, p0, m) do {                                      \
-  (m) = _mm_max_epu8(m, MM_ABS(p1, p0));                                       \
-  (m) = _mm_max_epu8(m, MM_ABS(p3, p2));                                       \
-  (m) = _mm_max_epu8(m, MM_ABS(p2, p1));                                       \
+  m = _mm_max_epu8(m, MM_ABS(p1, p0));                                         \
+  m = _mm_max_epu8(m, MM_ABS(p3, p2));                                         \
+  m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
 } while (0)

 #define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) {                             \
-  (e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]);                        \
-  (e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]);                        \
-  (e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]);                        \
-  (e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]);                        \
+  e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]);                            \
+  e2 = _mm_loadu_si128((__m128i*)&(p)[1 * stride]);                            \
+  e3 = _mm_loadu_si128((__m128i*)&(p)[2 * stride]);                            \
+  e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]);                            \
 }

 #define LOADUV_H_EDGE(p, u, v, stride) do {                                    \
  const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]);                 \
  const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]);                 \
-  (p) = _mm_unpacklo_epi64(U, V);                                              \
+  p = _mm_unpacklo_epi64(U, V);                                                \
 } while (0)

 #define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) {                        \
-  LOADUV_H_EDGE(e1, u, v, 0 * (stride));                                       \
-  LOADUV_H_EDGE(e2, u, v, 1 * (stride));                                       \
-  LOADUV_H_EDGE(e3, u, v, 2 * (stride));                                       \
-  LOADUV_H_EDGE(e4, u, v, 3 * (stride));                                       \
+  LOADUV_H_EDGE(e1, u, v, 0 * stride);                                         \
+  LOADUV_H_EDGE(e2, u, v, 1 * stride);                                         \
+  LOADUV_H_EDGE(e3, u, v, 2 * stride);                                         \
+  LOADUV_H_EDGE(e4, u, v, 3 * stride);                                         \
 }

 #define STOREUV(p, u, v, stride) {                                             \
-  _mm_storel_epi64((__m128i*)&(u)[(stride)], p);                               \
-  (p) = _mm_srli_si128(p, 8);                                                  \
-  _mm_storel_epi64((__m128i*)&(v)[(stride)], p);                               \
+  _mm_storel_epi64((__m128i*)&u[(stride)], p);                                 \
+  p = _mm_srli_si128(p, 8);                                                    \
+  _mm_storel_epi64((__m128i*)&v[(stride)], p);                                 \
 }

-static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
-                                         const __m128i* const p0,
-                                         const __m128i* const q0,
-                                         const __m128i* const q1,
-                                         int thresh, int ithresh,
-                                         __m128i* const mask) {
+static WEBP_INLINE void ComplexMask(const __m128i* const p1,
+                                    const __m128i* const p0,
+                                    const __m128i* const q0,
+                                    const __m128i* const q1,
+                                    int thresh, int ithresh,
+                                    __m128i* const mask) {
  const __m128i it = _mm_set1_epi8(ithresh);
  const __m128i diff = _mm_subs_epu8(*mask, it);
  const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
  __m128i filter_mask;
-  NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &filter_mask);
+  NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
  *mask = _mm_and_si128(thresh_mask, filter_mask);
 }

 // on macroblock edges
-static void VFilter16_SSE2(uint8_t* p, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
  __m128i t1;
  __m128i mask;
  __m128i p2, p1, p0, q0, q1, q2;
@@ -700,8 +694,8 @@ static void VFilter16_SSE2(uint8_t* p, int stride,
  LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
  MAX_DIFF2(t1, q2, q1, q0, mask);

-  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);

  // Store
  _mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
@@ -712,28 +706,28 @@ static void VFilter16_SSE2(uint8_t* p, int stride,
  _mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
 }

-static void HFilter16_SSE2(uint8_t* p, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i p3, p2, p1, p0, q0, q1, q2, q3;

  uint8_t* const b = p - 4;
-  Load16x4_SSE2(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);
+  Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);  // p3, p2, p1, p0
  MAX_DIFF1(p3, p2, p1, p0, mask);

-  Load16x4_SSE2(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
+  Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);  // q0, q1, q2, q3
  MAX_DIFF2(q3, q2, q1, q0, mask);

-  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);

-  Store16x4_SSE2(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
-  Store16x4_SSE2(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
+  Store16x4(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
+  Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
 }

 // on three inner edges
-static void VFilter16i_SSE2(uint8_t* p, int stride,
-                            int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
  int k;
  __m128i p3, p2, p1, p0;   // loop invariants

@@ -750,8 +744,8 @@ static void VFilter16i_SSE2(uint8_t* p, int stride,

    // p3 and p2 are not just temporary variables here: they will be
    // re-used for next span. And q2/q3 will become p1/p0 accordingly.
-    ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
-    DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
+    ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+    DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);

    // Store
    _mm_storeu_si128((__m128i*)&b[0 * stride], p1);
@@ -765,12 +759,12 @@ static void VFilter16i_SSE2(uint8_t* p, int stride,
  }
 }

-static void HFilter16i_SSE2(uint8_t* p, int stride,
-                            int thresh, int ithresh, int hev_thresh) {
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
  int k;
  __m128i p3, p2, p1, p0;   // loop invariants

-  Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0);  // prologue
+  Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0);  // prologue

  for (k = 3; k > 0; --k) {
    __m128i mask, tmp1, tmp2;
@@ -779,13 +773,13 @@ static void HFilter16i_SSE2(uint8_t* p, int stride,
    p += 4;  // beginning of q0 (and next span)

    MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
-    Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
+    Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
    MAX_DIFF2(p3, p2, tmp1, tmp2, mask);

-    ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
-    DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
+    ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+    DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);

-    Store16x4_SSE2(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
+    Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);

    // rotate samples
    p1 = tmp1;
@@ -794,8 +788,8 @@ static void HFilter16i_SSE2(uint8_t* p, int stride,
 }

 // 8-pixels wide variant, for chroma filtering
-static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
-                          int thresh, int ithresh, int hev_thresh) {
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i t1, p2, p1, p0, q0, q1, q2;

@@ -807,8 +801,8 @@ static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
  LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
  MAX_DIFF2(t1, q2, q1, q0, mask);

-  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);

  // Store
  STOREUV(p2, u, v, -3 * stride);
@@ -819,28 +813,28 @@ static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
  STOREUV(q2, u, v, 2 * stride);
 }

-static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
-                          int thresh, int ithresh, int hev_thresh) {
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i p3, p2, p1, p0, q0, q1, q2, q3;

  uint8_t* const tu = u - 4;
  uint8_t* const tv = v - 4;
-  Load16x4_SSE2(tu, tv, stride, &p3, &p2, &p1, &p0);
+  Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0);  // p3, p2, p1, p0
  MAX_DIFF1(p3, p2, p1, p0, mask);

-  Load16x4_SSE2(u, v, stride, &q0, &q1, &q2, &q3);
+  Load16x4(u, v, stride, &q0, &q1, &q2, &q3);    // q0, q1, q2, q3
  MAX_DIFF2(q3, q2, q1, q0, mask);

-  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);

-  Store16x4_SSE2(&p3, &p2, &p1, &p0, tu, tv, stride);
-  Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
+  Store16x4(&p3, &p2, &p1, &p0, tu, tv, stride);
+  Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
 }

-static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;

@@ -855,8 +849,8 @@ static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
  LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
  MAX_DIFF2(t2, t1, q1, q0, mask);

-  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);

  // Store
  STOREUV(p1, u, v, -2 * stride);
@@ -865,24 +859,24 @@ static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
  STOREUV(q1, u, v, 1 * stride);
 }

-static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;
-  Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0);   // p3, p2, p1, p0
+  Load16x4(u, v, stride, &t2, &t1, &p1, &p0);   // p3, p2, p1, p0
  MAX_DIFF1(t2, t1, p1, p0, mask);

  u += 4;  // beginning of q0
  v += 4;
-  Load16x4_SSE2(u, v, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
+  Load16x4(u, v, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
  MAX_DIFF2(t2, t1, q1, q0, mask);

-  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);

  u -= 2;  // beginning of p1
  v -= 2;
-  Store16x4_SSE2(&p1, &p0, &q0, &q1, u, v, stride);
+  Store16x4(&p1, &p0, &q0, &q1, u, v, stride);
 }

 //------------------------------------------------------------------------------
@@ -899,7 +893,7 @@ static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
 //   where: AC = (a + b + 1) >> 1,   BC = (b + c + 1) >> 1
 //   and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1

-static void VE4_SSE2(uint8_t* dst) {    // vertical
+static void VE4(uint8_t* dst) {    // vertical
  const __m128i one = _mm_set1_epi8(1);
  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -915,7 +909,7 @@ static void VE4_SSE2(uint8_t* dst) {    // vertical
  }
 }

-static void LD4_SSE2(uint8_t* dst) {   // Down-Left
+static void LD4(uint8_t* dst) {   // Down-Left
  const __m128i one = _mm_set1_epi8(1);
  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -931,7 +925,7 @@ static void LD4_SSE2(uint8_t* dst) {   // Down-Left
  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
 }

-static void VR4_SSE2(uint8_t* dst) {   // Vertical-Right
+static void VR4(uint8_t* dst) {   // Vertical-Right
  const __m128i one = _mm_set1_epi8(1);
  const int I = dst[-1 + 0 * BPS];
  const int J = dst[-1 + 1 * BPS];
@@ -956,7 +950,7 @@ static void VR4_SSE2(uint8_t* dst) {   // Vertical-Right
  DST(0, 3) = AVG3(K, J, I);
 }

-static void VL4_SSE2(uint8_t* dst) {   // Vertical-Left
+static void VL4(uint8_t* dst) {   // Vertical-Left
  const __m128i one = _mm_set1_epi8(1);
  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
  const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
@@ -981,7 +975,7 @@ static void VL4_SSE2(uint8_t* dst) {   // Vertical-Left
  DST(3, 3) = (extra_out >> 8) & 0xff;
 }

-static void RD4_SSE2(uint8_t* dst) {   // Down-right
+static void RD4(uint8_t* dst) {   // Down-right
  const __m128i one = _mm_set1_epi8(1);
  const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
  const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
@@ -1010,7 +1004,7 @@ static void RD4_SSE2(uint8_t* dst) {   // Down-right
 //------------------------------------------------------------------------------
 // Luma 16x16

-static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
+static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
  const uint8_t* top = dst - BPS;
  const __m128i zero = _mm_setzero_si128();
  int y;
@@ -1047,11 +1041,11 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
  }
 }

-static void TM4_SSE2(uint8_t* dst)   { TrueMotion_SSE2(dst, 4); }
-static void TM8uv_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 8); }
-static void TM16_SSE2(uint8_t* dst)  { TrueMotion_SSE2(dst, 16); }
+static void TM4(uint8_t* dst)   { TrueMotion(dst, 4); }
+static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
+static void TM16(uint8_t* dst)  { TrueMotion(dst, 16); }

-static void VE16_SSE2(uint8_t* dst) {
+static void VE16(uint8_t* dst) {
  const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
  int j;
  for (j = 0; j < 16; ++j) {
@@ -1059,7 +1053,7 @@ static void VE16_SSE2(uint8_t* dst) {
  }
 }

-static void HE16_SSE2(uint8_t* dst) {     // horizontal
+static void HE16(uint8_t* dst) {     // horizontal
  int j;
  for (j = 16; j > 0; --j) {
    const __m128i values = _mm_set1_epi8(dst[-1]);
@@ -1068,7 +1062,7 @@ static void HE16_SSE2(uint8_t* dst) {     // horizontal
  }
 }

-static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
+static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
  int j;
  const __m128i values = _mm_set1_epi8(v);
  for (j = 0; j < 16; ++j) {
@@ -1076,7 +1070,7 @@ static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
  }
 }

-static void DC16_SSE2(uint8_t* dst) {  // DC
+static void DC16(uint8_t* dst) {    // DC
  const __m128i zero = _mm_setzero_si128();
  const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
  const __m128i sad8x2 = _mm_sad_epu8(top, zero);
@@ -1089,37 +1083,37 @@ static void DC16_SSE2(uint8_t* dst) {  // DC
  }
  {
    const int DC = _mm_cvtsi128_si32(sum) + left + 16;
-    Put16_SSE2(DC >> 5, dst);
+    Put16(DC >> 5, dst);
  }
 }

-static void DC16NoTop_SSE2(uint8_t* dst) {  // DC with top samples unavailable
+static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
  int DC = 8;
  int j;
  for (j = 0; j < 16; ++j) {
    DC += dst[-1 + j * BPS];
  }
-  Put16_SSE2(DC >> 4, dst);
+  Put16(DC >> 4, dst);
 }

-static void DC16NoLeft_SSE2(uint8_t* dst) {  // DC with left samples unavailable
+static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
  const __m128i zero = _mm_setzero_si128();
  const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
  const __m128i sad8x2 = _mm_sad_epu8(top, zero);
  // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
  const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
  const int DC = _mm_cvtsi128_si32(sum) + 8;
-  Put16_SSE2(DC >> 4, dst);
+  Put16(DC >> 4, dst);
 }

-static void DC16NoTopLeft_SSE2(uint8_t* dst) {  // DC with no top & left samples
-  Put16_SSE2(0x80, dst);
+static void DC16NoTopLeft(uint8_t* dst) {  // DC with no top and left samples
+  Put16(0x80, dst);
 }

 //------------------------------------------------------------------------------
 // Chroma

-static void VE8uv_SSE2(uint8_t* dst) {    // vertical
+static void VE8uv(uint8_t* dst) {    // vertical
  int j;
  const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
  for (j = 0; j < 8; ++j) {
@@ -1127,8 +1121,17 @@ static void VE8uv_SSE2(uint8_t* dst) {    // vertical
  }
 }

+static void HE8uv(uint8_t* dst) {    // horizontal
+  int j;
+  for (j = 0; j < 8; ++j) {
+    const __m128i values = _mm_set1_epi8(dst[-1]);
+    _mm_storel_epi64((__m128i*)dst, values);
+    dst += BPS;
+  }
+}
+
 // helper for chroma-DC predictions
-static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
+static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
  int j;
  const __m128i values = _mm_set1_epi8(v);
  for (j = 0; j < 8; ++j) {
@@ -1136,7 +1139,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
  }
 }

-static void DC8uv_SSE2(uint8_t* dst) {     // DC
+static void DC8uv(uint8_t* dst) {     // DC
  const __m128i zero = _mm_setzero_si128();
  const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
  const __m128i sum = _mm_sad_epu8(top, zero);
@@ -1147,29 +1150,29 @@ static void DC8uv_SSE2(uint8_t* dst) {     // DC
  }
  {
    const int DC = _mm_cvtsi128_si32(sum) + left + 8;
-    Put8x8uv_SSE2(DC >> 4, dst);
+    Put8x8uv(DC >> 4, dst);
  }
 }

-static void DC8uvNoLeft_SSE2(uint8_t* dst) {   // DC with no left samples
+static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
  const __m128i zero = _mm_setzero_si128();
  const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
  const __m128i sum = _mm_sad_epu8(top, zero);
  const int DC = _mm_cvtsi128_si32(sum) + 4;
-  Put8x8uv_SSE2(DC >> 3, dst);
+  Put8x8uv(DC >> 3, dst);
 }

-static void DC8uvNoTop_SSE2(uint8_t* dst) {  // DC with no top samples
+static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
  int dc0 = 4;
  int i;
  for (i = 0; i < 8; ++i) {
    dc0 += dst[-1 + i * BPS];
  }
-  Put8x8uv_SSE2(dc0 >> 3, dst);
+  Put8x8uv(dc0 >> 3, dst);
 }

-static void DC8uvNoTopLeft_SSE2(uint8_t* dst) {    // DC with nothing
-  Put8x8uv_SSE2(0x80, dst);
+static void DC8uvNoTopLeft(uint8_t* dst) {    // DC with nothing
+  Put8x8uv(0x80, dst);
 }

 //------------------------------------------------------------------------------
@@ -1178,46 +1181,47 @@ static void DC8uvNoTopLeft_SSE2(uint8_t* dst) {    // DC with nothing
 extern void VP8DspInitSSE2(void);

 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
-  VP8Transform = Transform_SSE2;
-#if (USE_TRANSFORM_AC3 == 1)
-  VP8TransformAC3 = TransformAC3_SSE2;
+  VP8Transform = Transform;
+#if defined(USE_TRANSFORM_AC3)
+  VP8TransformAC3 = TransformAC3;
 #endif

-  VP8VFilter16 = VFilter16_SSE2;
-  VP8HFilter16 = HFilter16_SSE2;
-  VP8VFilter8 = VFilter8_SSE2;
-  VP8HFilter8 = HFilter8_SSE2;
-  VP8VFilter16i = VFilter16i_SSE2;
-  VP8HFilter16i = HFilter16i_SSE2;
-  VP8VFilter8i = VFilter8i_SSE2;
-  VP8HFilter8i = HFilter8i_SSE2;
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;

-  VP8SimpleVFilter16 = SimpleVFilter16_SSE2;
-  VP8SimpleHFilter16 = SimpleHFilter16_SSE2;
-  VP8SimpleVFilter16i = SimpleVFilter16i_SSE2;
-  VP8SimpleHFilter16i = SimpleHFilter16i_SSE2;
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;

-  VP8PredLuma4[1] = TM4_SSE2;
-  VP8PredLuma4[2] = VE4_SSE2;
-  VP8PredLuma4[4] = RD4_SSE2;
-  VP8PredLuma4[5] = VR4_SSE2;
-  VP8PredLuma4[6] = LD4_SSE2;
-  VP8PredLuma4[7] = VL4_SSE2;
+  VP8PredLuma4[1] = TM4;
+  VP8PredLuma4[2] = VE4;
+  VP8PredLuma4[4] = RD4;
+  VP8PredLuma4[5] = VR4;
+  VP8PredLuma4[6] = LD4;
+  VP8PredLuma4[7] = VL4;

-  VP8PredLuma16[0] = DC16_SSE2;
-  VP8PredLuma16[1] = TM16_SSE2;
-  VP8PredLuma16[2] = VE16_SSE2;
-  VP8PredLuma16[3] = HE16_SSE2;
-  VP8PredLuma16[4] = DC16NoTop_SSE2;
-  VP8PredLuma16[5] = DC16NoLeft_SSE2;
-  VP8PredLuma16[6] = DC16NoTopLeft_SSE2;
+  VP8PredLuma16[0] = DC16;
+  VP8PredLuma16[1] = TM16;
+  VP8PredLuma16[2] = VE16;
+  VP8PredLuma16[3] = HE16;
+  VP8PredLuma16[4] = DC16NoTop;
+  VP8PredLuma16[5] = DC16NoLeft;
+  VP8PredLuma16[6] = DC16NoTopLeft;

-  VP8PredChroma8[0] = DC8uv_SSE2;
-  VP8PredChroma8[1] = TM8uv_SSE2;
-  VP8PredChroma8[2] = VE8uv_SSE2;
-  VP8PredChroma8[4] = DC8uvNoTop_SSE2;
-  VP8PredChroma8[5] = DC8uvNoLeft_SSE2;
-  VP8PredChroma8[6] = DC8uvNoTopLeft_SSE2;
+  VP8PredChroma8[0] = DC8uv;
+  VP8PredChroma8[1] = TM8uv;
+  VP8PredChroma8[2] = VE8uv;
+  VP8PredChroma8[3] = HE8uv;
+  VP8PredChroma8[4] = DC8uvNoTop;
+  VP8PredChroma8[5] = DC8uvNoLeft;
+  VP8PredChroma8[6] = DC8uvNoTopLeft;
 }

 #else  // !WEBP_USE_SSE2
--- a/src/dsp/dec_sse41.c
+++ b/src/dsp/dec_sse41.c
@@ -11,15 +11,15 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_SSE41)

 #include <smmintrin.h>
-#include "src/dec/vp8i_dec.h"
-#include "src/utils/utils.h"
+#include "../dec/vp8i_dec.h"
+#include "../utils/utils.h"

-static void HE16_SSE41(uint8_t* dst) {     // horizontal
+static void HE16(uint8_t* dst) {     // horizontal
  int j;
  const __m128i kShuffle3 = _mm_set1_epi8(3);
  for (j = 16; j > 0; --j) {
@@ -36,7 +36,7 @@ static void HE16_SSE41(uint8_t* dst) {     // horizontal
 extern void VP8DspInitSSE41(void);

 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
-  VP8PredLuma16[3] = HE16_SSE41;
+  VP8PredLuma16[3] = HE16;
 }

 #else  // !WEBP_USE_SSE41
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -15,10 +15,10 @@
 #define WEBP_DSP_DSP_H_

 #ifdef HAVE_CONFIG_H
-#include "src/webp/config.h"
+#include "../webp/config.h"
 #endif

-#include "src/webp/types.h"
+#include "../webp/types.h"

 #ifdef __cplusplus
 extern "C" {
@@ -38,22 +38,10 @@ extern "C" {
 # define LOCAL_GCC_PREREQ(maj, min) 0
 #endif

-#if defined(__clang__)
-# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
-# define LOCAL_CLANG_PREREQ(maj, min) \
-    (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_CLANG_VERSION 0
-# define LOCAL_CLANG_PREREQ(maj, min) 0
-#endif
-
 #ifndef __has_builtin
 # define __has_builtin(x) 0
 #endif

-// for now, none of the optimizations below are available in emscripten
-#if !defined(EMSCRIPTEN)
-
 #if defined(_MSC_VER) && _MSC_VER > 1310 && \
    (defined(_M_X64) || defined(_M_IX86))
 #define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
@@ -80,17 +68,15 @@ extern "C" {
 #define WEBP_USE_AVX2
 #endif

-// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
-// inline assembly would need to be modified for use with Native Client.
-#if (defined(__ARM_NEON__) || \
-     defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \
-    !defined(__native_client__)
-#define WEBP_USE_NEON
+#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
+#define WEBP_ANDROID_NEON  // Android targets that might support NEON
 #endif

-#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
-    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
-#define WEBP_ANDROID_NEON  // Android targets that may have NEON
+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
+// inline assembly would need to be modified for use with Native Client.
+#if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || \
+     defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \
+    !defined(__native_client__)
 #define WEBP_USE_NEON
 #endif

@@ -104,7 +90,7 @@ extern "C" {
 #define WEBP_USE_MIPS32
 #if (__mips_isa_rev >= 2)
 #define WEBP_USE_MIPS32_R2
-#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
+#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
 #define WEBP_USE_MIPS_DSP_R2
 #endif
 #endif
@@ -114,24 +100,6 @@ extern "C" {
 #define WEBP_USE_MSA
 #endif

-#endif  /* EMSCRIPTEN */
-
-#ifndef WEBP_DSP_OMIT_C_CODE
-#define WEBP_DSP_OMIT_C_CODE 1
-#endif
-
-#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
-#define WEBP_NEON_OMIT_C_CODE 1
-#else
-#define WEBP_NEON_OMIT_C_CODE 0
-#endif
-
-#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
-#define WEBP_NEON_WORK_AROUND_GCC 1
-#else
-#define WEBP_NEON_WORK_AROUND_GCC 0
-#endif
-
 // This macro prevents thread_sanitizer from reporting known concurrent writes.
 #define WEBP_TSAN_IGNORE_FUNCTION
 #if defined(__has_feature)
@@ -161,18 +129,6 @@ extern "C" {
 #endif
 #endif

-// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
-#if !defined(WEBP_SWAP_16BIT_CSP)
-#define WEBP_SWAP_16BIT_CSP 0
-#endif
-
-// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
-#if !defined(WORDS_BIGENDIAN) && \
-    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
-     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
-#define WORDS_BIGENDIAN
-#endif
-
 typedef enum {
  kSSE2,
  kSSE3,
@@ -187,7 +143,7 @@ typedef enum {
 } CPUFeature;
 // returns true if the CPU supports the feature.
 typedef int (*VP8CPUInfo)(CPUFeature feature);
-WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
+WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;

 //------------------------------------------------------------------------------
 // Init stub generator
@@ -315,7 +271,6 @@ typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
                                        int xo, int yo,  // center position
                                        int W, int H);   // plane dimension

-#if !defined(WEBP_REDUCE_SIZE)
 // This version is called with the guarantee that you can load 8 bytes and
 // 8 rows at offset src1 and src2
 typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
@@ -323,13 +278,10 @@ typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,

 extern VP8SSIMGetFunc VP8SSIMGet;         // unclipped / unchecked
 extern VP8SSIMGetClippedFunc VP8SSIMGetClipped;   // with clipping
-#endif

-#if !defined(WEBP_DISABLE_STATS)
 typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1,
                                         const uint8_t* src2, int len);
 extern VP8AccumulateSSEFunc VP8AccumulateSSE;
-#endif

 // must be called before using any of the above directly
 void VP8SSIMDspInit(void);
@@ -510,12 +462,12 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
 extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;

 // Plain-C implementation, as fall-back.
-extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
-                                          const uint8_t* src);
-extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
-                                          const uint8_t* src);
-extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
-extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
+extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk,
+                                         const uint8_t* src);
+extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk,
+                                         const uint8_t* src);
+extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
+extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);

 // Main entry calls:
 extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
@@ -581,29 +533,25 @@ void WebPMultRows(uint8_t* ptr, int stride,
                  int width, int num_rows, int inverse);

 // Plain-C versions, used as fallback by some implementations.
-void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
-                   int width, int inverse);
-void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
-
-#ifdef WORDS_BIGENDIAN
-// ARGB packing function: a/r/g/b input is rgba or bgra order.
-extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r,
-                            const uint8_t* g, const uint8_t* b, int len,
-                            uint32_t* out);
-#endif
-
-// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
-extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                           int len, int step, uint32_t* out);
-
-// This function returns true if src[i] contains a value different from 0xff.
-extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
-// This function returns true if src[4*i] contains a value different from 0xff.
-extern int (*WebPHasAlpha32b)(const uint8_t* src, int length);
+void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
+                  int width, int inverse);
+void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);

 // To be called first before using the above.
 void WebPInitAlphaProcessing(void);

+// ARGB packing function: a/r/g/b input is rgba or bgra order.
+extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
+                           const uint8_t* g, const uint8_t* b, int len,
+                           uint32_t* out);
+
+// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
+extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                          int len, int step, uint32_t* out);
+
+// To be called first before using the above.
+void VP8EncDspARGBInit(void);
+
 //------------------------------------------------------------------------------
 // Filter functions

--- a/src/dsp/enc.c
+++ b/src/dsp/enc.c
@@ -14,18 +14,16 @@
 #include <assert.h>
 #include <stdlib.h>  // for abs()

-#include "src/dsp/dsp.h"
-#include "src/enc/vp8i_enc.h"
+#include "./dsp.h"
+#include "../enc/vp8i_enc.h"

 static WEBP_INLINE uint8_t clip_8b(int v) {
  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
 }

-#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int clip_max(int v, int max) {
  return (v > max) ? max : v;
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

 //------------------------------------------------------------------------------
 // Compute susceptibility based on DCT-coeff histograms:
@@ -58,10 +56,9 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
  histo->last_non_zero = last_non_zero;
 }

-#if !WEBP_NEON_OMIT_C_CODE
-static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
-                               int start_block, int end_block,
-                               VP8Histogram* const histo) {
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
  int j;
  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
  for (j = start_block; j < end_block; ++j) {
@@ -79,7 +76,6 @@ static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
  }
  VP8SetHistogramData(distribution, histo);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

 //------------------------------------------------------------------------------
 // run-time tables (~4k)
@@ -104,8 +100,6 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)

-#if !WEBP_NEON_OMIT_C_CODE
-
 #define STORE(x, y, v) \
  dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))

@@ -146,15 +140,15 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
  }
 }

-static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-                         int do_two) {
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                       int do_two) {
  ITransformOne(ref, in, dst);
  if (do_two) {
    ITransformOne(ref + 4, in + 16, dst + 4);
  }
 }

-static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
  int i;
  int tmp[16];
  for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
@@ -182,16 +176,13 @@ static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
    out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
-                          int16_t* out) {
+static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
  VP8FTransform(src, ref, out);
  VP8FTransform(src + 4, ref + 4, out + 16);
 }

-#if !WEBP_NEON_OMIT_C_CODE
-static void FTransformWHT_C(const int16_t* in, int16_t* out) {
+static void FTransformWHT(const int16_t* in, int16_t* out) {
  // input is 12b signed
  int32_t tmp[16];
  int i;
@@ -220,7 +211,6 @@ static void FTransformWHT_C(const int16_t* in, int16_t* out) {
    out[12 + i] = b3 >> 1;
  }
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

 #undef MUL
 #undef STORE
@@ -313,8 +303,8 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
 //------------------------------------------------------------------------------
 // Chroma 8x8 prediction (paragraph 12.2)

-static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
-                               const uint8_t* top) {
+static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
+                             const uint8_t* top) {
  // U block
  DCMode(C8DC8 + dst, left, top, 8, 8, 4);
  VerticalPred(C8VE8 + dst, top, 8);
@@ -333,8 +323,8 @@ static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
 //------------------------------------------------------------------------------
 // luma 16x16 prediction (paragraph 12.3)

-static void Intra16Preds_C(uint8_t* dst,
-                           const uint8_t* left, const uint8_t* top) {
+static void Intra16Preds(uint8_t* dst,
+                         const uint8_t* left, const uint8_t* top) {
  DCMode(I16DC16 + dst, left, top, 16, 16, 5);
  VerticalPred(I16VE16 + dst, top, 16);
  HorizontalPred(I16HE16 + dst, left, 16);
@@ -517,7 +507,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {

 // Left samples are top[-5 .. -2], top_left is top[-1], top are
 // located at top[0..3], and top right is top[4..7]
-static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
+static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
  DC4(I4DC4 + dst, top);
  TM4(I4TM4 + dst, top);
  VE4(I4VE4 + dst, top);
@@ -533,7 +523,6 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
 //------------------------------------------------------------------------------
 // Metric

-#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
                              int w, int h) {
  int count = 0;
@@ -549,21 +538,20 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
  return count;
 }

-static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
  return GetSSE(a, b, 16, 16);
 }
-static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
  return GetSSE(a, b, 16, 8);
 }
-static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
  return GetSSE(a, b, 8, 8);
 }
-static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
  return GetSSE(a, b, 4, 4);
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

-static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
+static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
  int k, x, y;
  for (k = 0; k < 4; ++k) {
    uint32_t avg = 0;
@@ -583,7 +571,6 @@ static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
 // We try to match the spectral content (weighted) between source and
 // reconstructed samples.

-#if !WEBP_NEON_OMIT_C_CODE
 // Hadamard transform
 // Returns the weighted sum of the absolute value of transformed coefficients.
 // w[] contains a row-major 4 by 4 symmetric matrix.
@@ -621,25 +608,24 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
  return sum;
 }

-static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
  const int sum1 = TTransform(a, w);
  const int sum2 = TTransform(b, w);
  return abs(sum2 - sum1) >> 5;
 }

-static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
-                        const uint16_t* const w) {
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
  int D = 0;
  int x, y;
  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
    for (x = 0; x < 16; x += 4) {
-      D += Disto4x4_C(a + x + y, b + x + y, w);
+      D += Disto4x4(a + x + y, b + x + y, w);
    }
  }
  return D;
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE

 //------------------------------------------------------------------------------
 // Quantization
@@ -650,8 +636,8 @@ static const uint8_t kZigzag[16] = {
 };

 // Simple quantization
-static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
-                           const VP8Matrix* const mtx) {
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
  int last = -1;
  int n;
  for (n = 0; n < 16; ++n) {
@@ -676,15 +662,13 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
  return (last >= 0);
 }

-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
-                             const VP8Matrix* const mtx) {
+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
  int nz;
  nz  = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
  nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
  return nz;
 }
-#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC

 //------------------------------------------------------------------------------
 // Block copy
@@ -698,14 +682,148 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
  }
 }

-static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
+static void Copy4x4(const uint8_t* src, uint8_t* dst) {
  Copy(src, dst, 4, 4);
 }

-static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
+static void Copy16x8(const uint8_t* src, uint8_t* dst) {
  Copy(src, dst, 16, 8);
 }

+//------------------------------------------------------------------------------
+// SSIM / PSNR
+
+// hat-shaped filter. Sum of coefficients is equal to 16.
+static const uint32_t kWeight[2 * VP8_SSIM_KERNEL + 1] = {
+  1, 2, 3, 4, 3, 2, 1
+};
+static const uint32_t kWeightSum = 16 * 16;   // sum{kWeight}^2
+
+static WEBP_INLINE double SSIMCalculation(
+    const VP8DistoStats* const stats, uint32_t N  /*num samples*/) {
+  const uint32_t w2 =  N * N;
+  const uint32_t C1 = 20 * w2;
+  const uint32_t C2 = 60 * w2;
+  const uint32_t C3 = 8 * 8 * w2;   // 'dark' limit ~= 6
+  const uint64_t xmxm = (uint64_t)stats->xm * stats->xm;
+  const uint64_t ymym = (uint64_t)stats->ym * stats->ym;
+  if (xmxm + ymym >= C3) {
+    const int64_t xmym = (int64_t)stats->xm * stats->ym;
+    const int64_t sxy = (int64_t)stats->xym * N - xmym;    // can be negative
+    const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm;
+    const uint64_t syy = (uint64_t)stats->yym * N - ymym;
+    // we descale by 8 to prevent overflow during the fnum/fden multiply.
+    const uint64_t num_S = (2 * (uint64_t)(sxy < 0 ? 0 : sxy) + C2) >> 8;
+    const uint64_t den_S = (sxx + syy + C2) >> 8;
+    const uint64_t fnum = (2 * xmym + C1) * num_S;
+    const uint64_t fden = (xmxm + ymym + C1) * den_S;
+    const double r = (double)fnum / fden;
+    assert(r >= 0. && r <= 1.0);
+    return r;
+  }
+  return 1.;   // area is too dark to contribute meaningfully
+}
+
+double VP8SSIMFromStats(const VP8DistoStats* const stats) {
+  return SSIMCalculation(stats, kWeightSum);
+}
+
+double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats) {
+  return SSIMCalculation(stats, stats->w);
+}
+
+static double SSIMGetClipped_C(const uint8_t* src1, int stride1,
+                               const uint8_t* src2, int stride2,
+                               int xo, int yo, int W, int H) {
+  VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
+  const int ymin = (yo - VP8_SSIM_KERNEL < 0) ? 0 : yo - VP8_SSIM_KERNEL;
+  const int ymax = (yo + VP8_SSIM_KERNEL > H - 1) ? H - 1
+                                                  : yo + VP8_SSIM_KERNEL;
+  const int xmin = (xo - VP8_SSIM_KERNEL < 0) ? 0 : xo - VP8_SSIM_KERNEL;
+  const int xmax = (xo + VP8_SSIM_KERNEL > W - 1) ? W - 1
+                                                  : xo + VP8_SSIM_KERNEL;
+  int x, y;
+  src1 += ymin * stride1;
+  src2 += ymin * stride2;
+  for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
+    for (x = xmin; x <= xmax; ++x) {
+      const uint32_t w = kWeight[VP8_SSIM_KERNEL + x - xo]
+                       * kWeight[VP8_SSIM_KERNEL + y - yo];
+      const uint32_t s1 = src1[x];
+      const uint32_t s2 = src2[x];
+      stats.w   += w;
+      stats.xm  += w * s1;
+      stats.ym  += w * s2;
+      stats.xxm += w * s1 * s1;
+      stats.xym += w * s1 * s2;
+      stats.yym += w * s2 * s2;
+    }
+  }
+  return VP8SSIMFromStatsClipped(&stats);
+}
+
+static double SSIMGet_C(const uint8_t* src1, int stride1,
+                        const uint8_t* src2, int stride2) {
+  VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
+  int x, y;
+  for (y = 0; y <= 2 * VP8_SSIM_KERNEL; ++y, src1 += stride1, src2 += stride2) {
+    for (x = 0; x <= 2 * VP8_SSIM_KERNEL; ++x) {
+      const uint32_t w = kWeight[x] * kWeight[y];
+      const uint32_t s1 = src1[x];
+      const uint32_t s2 = src2[x];
+      stats.xm  += w * s1;
+      stats.ym  += w * s2;
+      stats.xxm += w * s1 * s1;
+      stats.xym += w * s1 * s2;
+      stats.yym += w * s2 * s2;
+    }
+  }
+  return VP8SSIMFromStats(&stats);
+}
+
+//------------------------------------------------------------------------------
+
+static uint32_t AccumulateSSE(const uint8_t* src1,
+                              const uint8_t* src2, int len) {
+  int i;
+  uint32_t sse2 = 0;
+  assert(len <= 65535);  // to ensure that accumulation fits within uint32_t
+  for (i = 0; i < len; ++i) {
+    const int32_t diff = src1[i] - src2[i];
+    sse2 += diff * diff;
+  }
+  return sse2;
+}
+
+//------------------------------------------------------------------------------
+
+VP8SSIMGetFunc VP8SSIMGet;
+VP8SSIMGetClippedFunc VP8SSIMGetClipped;
+VP8AccumulateSSEFunc VP8AccumulateSSE;
+
+extern void VP8SSIMDspInitSSE2(void);
+
+static volatile VP8CPUInfo ssim_last_cpuinfo_used =
+    (VP8CPUInfo)&ssim_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) {
+  if (ssim_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  VP8SSIMGetClipped = SSIMGetClipped_C;
+  VP8SSIMGet = SSIMGet_C;
+
+  VP8AccumulateSSE = AccumulateSSE;
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8SSIMDspInitSSE2();
+    }
+#endif
+  }
+
+  ssim_last_cpuinfo_used = VP8GetCPUInfo;
+}
+
 //------------------------------------------------------------------------------
 // Initialization

@@ -750,32 +868,26 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
  InitTables();

  // default C implementations
-#if !WEBP_NEON_OMIT_C_CODE
-  VP8ITransform = ITransform_C;
-  VP8FTransform = FTransform_C;
-  VP8FTransformWHT = FTransformWHT_C;
-  VP8TDisto4x4 = Disto4x4_C;
-  VP8TDisto16x16 = Disto16x16_C;
-  VP8CollectHistogram = CollectHistogram_C;
-  VP8SSE16x16 = SSE16x16_C;
-  VP8SSE16x8 = SSE16x8_C;
-  VP8SSE8x8 = SSE8x8_C;
-  VP8SSE4x4 = SSE4x4_C;
-#endif
-
-#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
-  VP8EncQuantizeBlock = QuantizeBlock_C;
-  VP8EncQuantize2Blocks = Quantize2Blocks_C;
-#endif
-
-  VP8FTransform2 = FTransform2_C;
-  VP8EncPredLuma4 = Intra4Preds_C;
-  VP8EncPredLuma16 = Intra16Preds_C;
-  VP8EncPredChroma8 = IntraChromaPreds_C;
-  VP8Mean16x4 = Mean16x4_C;
-  VP8EncQuantizeBlockWHT = QuantizeBlock_C;
-  VP8Copy4x4 = Copy4x4_C;
-  VP8Copy16x8 = Copy16x8_C;
+  VP8CollectHistogram = CollectHistogram;
+  VP8ITransform = ITransform;
+  VP8FTransform = FTransform;
+  VP8FTransform2 = FTransform2;
+  VP8FTransformWHT = FTransformWHT;
+  VP8EncPredLuma4 = Intra4Preds;
+  VP8EncPredLuma16 = Intra16Preds;
+  VP8EncPredChroma8 = IntraChromaPreds;
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE4x4 = SSE4x4;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+  VP8Mean16x4 = Mean16x4;
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
+  VP8EncQuantizeBlockWHT = QuantizeBlock;
+  VP8Copy4x4 = Copy4x4;
+  VP8Copy16x8 = Copy16x8;

  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
  if (VP8GetCPUInfo != NULL) {
@@ -794,6 +906,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
      VP8EncDspInitAVX2();
    }
 #endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8EncDspInitNEON();
+    }
+#endif
 #if defined(WEBP_USE_MIPS32)
    if (VP8GetCPUInfo(kMIPS32)) {
      VP8EncDspInitMIPS32();
@@ -810,34 +927,5 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
    }
 #endif
  }
-
-#if defined(WEBP_USE_NEON)
-  if (WEBP_NEON_OMIT_C_CODE ||
-      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-    VP8EncDspInitNEON();
-  }
-#endif
-
-  assert(VP8ITransform != NULL);
-  assert(VP8FTransform != NULL);
-  assert(VP8FTransformWHT != NULL);
-  assert(VP8TDisto4x4 != NULL);
-  assert(VP8TDisto16x16 != NULL);
-  assert(VP8CollectHistogram != NULL);
-  assert(VP8SSE16x16 != NULL);
-  assert(VP8SSE16x8 != NULL);
-  assert(VP8SSE8x8 != NULL);
-  assert(VP8SSE4x4 != NULL);
-  assert(VP8EncQuantizeBlock != NULL);
-  assert(VP8EncQuantize2Blocks != NULL);
-  assert(VP8FTransform2 != NULL);
-  assert(VP8EncPredLuma4 != NULL);
-  assert(VP8EncPredLuma16 != NULL);
-  assert(VP8EncPredChroma8 != NULL);
-  assert(VP8Mean16x4 != NULL);
-  assert(VP8EncQuantizeBlockWHT != NULL);
-  assert(VP8Copy4x4 != NULL);
-  assert(VP8Copy16x8 != NULL);
-
  enc_last_cpuinfo_used = VP8GetCPUInfo;
 }
--- a/src/dsp/enc_avx2.c
+++ b/src/dsp/enc_avx2.c
@@ -9,7 +9,7 @@
 //
 // AVX2 version of speed-critical encoding functions.

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_AVX2)

--- a/src/dsp/enc_mips32.c
+++ b/src/dsp/enc_mips32.c
@@ -13,13 +13,13 @@
 //            Jovan Zelincevic (jovan.zelincevic@imgtec.com)
 //            Slobodan Prijic  (slobodan.prijic@imgtec.com)

-#include "src/dsp/dsp.h"
+#include "./dsp.h"

 #if defined(WEBP_USE_MIPS32)

-#include "src/dsp/mips_macro.h"
-#include "src/enc/vp8i_enc.h"
-#include "src/enc/cost_enc.h"
+#include "./mips_macro.h"
+#include "../enc/vp8i_enc.h"
+#include "../enc/cost_enc.h"

 static const int kC1 = 20091 + (1 << 16);
 static const int kC2 = 35468;
@@ -113,9 +113,8 @@ static const int kC2 = 35468;
  "sb      %[" #TEMP12 "],   3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"

 // Does one or two inverse transforms.
-static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
-                                             const int16_t* in,
-                                             uint8_t* dst) {
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
+                                      uint8_t* dst) {
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
  int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
  int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
@@ -145,11 +144,11 @@ static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
  );
 }

-static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
-                              uint8_t* dst, int do_two) {
-  ITransformOne_MIPS32(ref, in, dst);
+static void ITransform(const uint8_t* ref, const int16_t* in,
+                       uint8_t* dst, int do_two) {
+  ITransformOne(ref, in, dst);
  if (do_two) {
-    ITransformOne_MIPS32(ref + 4, in + 16, dst + 4);
+    ITransformOne(ref + 4, in + 16, dst + 4);
  }
 }

@@ -188,8 +187,8 @@ static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
  "sh           %[temp5],       " #J "(%[ppin])                     \n\t"   \
  "sh           %[level],       " #N "(%[pout])                     \n\t"

-static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
-                                const VP8Matrix* const mtx) {
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
  int temp0, temp1, temp2, temp3, temp4, temp5;
  int sign, coeff, level, i;
  int max_level = MAX_LEVEL;
@@ -239,11 +238,11 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
  return 0;
 }

-static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
-                                  const VP8Matrix* const mtx) {
+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
  int nz;
-  nz  = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
-  nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
  return nz;
 }

@@ -362,8 +361,8 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
  "msub   %[temp6],  %[temp0]                \n\t"                \
  "msub   %[temp7],  %[temp1]                \n\t"

-static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
-                           const uint16_t* const w) {
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
  int tmp[32];
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;

@@ -397,13 +396,13 @@ static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
 #undef VERTICAL_PASS
 #undef HORIZONTAL_PASS

-static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
-                             const uint16_t* const w) {
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
  int D = 0;
  int x, y;
  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
    for (x = 0; x < 16; x += 4) {
-      D += Disto4x4_MIPS32(a + x + y, b + x + y, w);
+      D += Disto4x4(a + x + y, b + x + y, w);
    }
  }
  return D;
@@ -479,8 +478,7 @@ static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
  "sh     %[" #TEMP8 "],  " #D "(%[temp20])              \n\t"    \
  "sh     %[" #TEMP12 "], " #B "(%[temp20])              \n\t"

-static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
-                              int16_t* out) {
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
  int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
  int temp17, temp18, temp19, temp20;
@@ -541,7 +539,7 @@ static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
  GET_SSE_INNER(C, C + 1, C + 2, C + 3)   \
  GET_SSE_INNER(D, D + 1, D + 2, D + 3)

-static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
  int count;
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

@@ -575,7 +573,7 @@ static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
  return count;
 }

-static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
  int count;
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

@@ -601,7 +599,7 @@ static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
  return count;
 }

-static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
  int count;
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

@@ -623,7 +621,7 @@ static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
  return count;
 }

-static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
  int count;
  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

@@ -653,20 +651,17 @@ static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
 extern void VP8EncDspInitMIPS32(void);

 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPS32(void) {
-  VP8ITransform = ITransform_MIPS32;
-  VP8FTransform = FTransform_MIPS32;
-
-  VP8EncQuantizeBlock = QuantizeBlock_MIPS32;
-  VP8EncQuantize2Blocks = Quantize2Blocks_MIPS32;
-
-  VP8TDisto4x4 = Disto4x4_MIPS32;
-  VP8TDisto16x16 = Disto16x16_MIPS32;
-
+  VP8ITransform = ITransform;
+  VP8FTransform = FTransform;
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
 #if !defined(WORK_AROUND_GCC)
-  VP8SSE16x16 = SSE16x16_MIPS32;
-  VP8SSE8x8 = SSE8x8_MIPS32;
-  VP8SSE16x8 = SSE16x8_MIPS32;
-  VP8SSE4x4 = SSE4x4_MIPS32;
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE4x4 = SSE4x4;
 #endif
 }

--- a/Show More
+++ b/Show More