mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-15 21:39:59 +02:00
Compare commits
46 Commits
0.6.1
...
portable-i
Author | SHA1 | Date | |
---|---|---|---|
0d7614fddf | |||
88692490a5 | |||
0af22e17d6 | |||
08af967025 | |||
a26996116f | |||
5505a5b107 | |||
8ed24a564c | |||
0e8c3004be | |||
6fcc3a720d | |||
2371724d79 | |||
9d1e151bf5 | |||
a1ab868427 | |||
0b8ecc8cfa | |||
c646241391 | |||
cfaebe3e95 | |||
c0eb3ff7d4 | |||
415b98ffad | |||
09bcd9a397 | |||
e83df9d208 | |||
3387fb6fa6 | |||
599bddb658 | |||
28fbe808b9 | |||
c396e6701b | |||
96ef09107c | |||
bc01db116f | |||
d2adc08095 | |||
d6f90a3d83 | |||
cd01fc3944 | |||
9a1a3aa827 | |||
9eceff25c0 | |||
fe6184d706 | |||
cb6c3a2a36 | |||
ec666c7526 | |||
bafa90ccd8 | |||
e6e3ec335c | |||
168a3a9e28 | |||
ad4ca27449 | |||
3a5528713b | |||
b4cefba731 | |||
440945ca57 | |||
a37a7b00d5 | |||
a604ab5600 | |||
b005d916f8 | |||
586eda373d | |||
4026e34e3f | |||
4b21971337 |
@ -55,6 +55,9 @@ dsp_dec_srcs := \
|
||||
src/dsp/alpha_processing_neon.$(NEON) \
|
||||
src/dsp/alpha_processing_sse2.c \
|
||||
src/dsp/alpha_processing_sse41.c \
|
||||
src/dsp/argb.c \
|
||||
src/dsp/argb_mips_dsp_r2.c \
|
||||
src/dsp/argb_sse2.c \
|
||||
src/dsp/cpu.c \
|
||||
src/dsp/dec.c \
|
||||
src/dsp/dec_clip_tables.c \
|
||||
|
141
CMakeLists.txt
141
CMakeLists.txt
@ -4,17 +4,17 @@ project(libwebp C)
|
||||
|
||||
# Options for coder / decoder executables.
|
||||
option(WEBP_ENABLE_SIMD "Enable any SIMD optimization." ON)
|
||||
option(WEBP_ENABLE_WASM "Enable WebAssembly optimizations." OFF)
|
||||
option(WEBP_BUILD_CWEBP "Build the cwebp command line tool." OFF)
|
||||
option(WEBP_BUILD_DWEBP "Build the dwebp command line tool." OFF)
|
||||
option(WEBP_BUILD_GIF2WEBP "Build the gif2webp conversion tool." OFF)
|
||||
option(WEBP_BUILD_IMG2WEBP "Build the img2webp animation tool." OFF)
|
||||
option(WEBP_BUILD_WEBPINFO "Build the webpinfo command line tool." OFF)
|
||||
option(WEBP_BUILD_WEBP_JS "Emscripten build of webp.js." OFF)
|
||||
option(WEBP_ENABLE_NEAR_LOSSLESS "Enable near-lossless encoding" ON)
|
||||
option(WEBP_EXPERIMENTAL_FEATURES "Build with experimental features." OFF)
|
||||
option(WEBP_ENABLE_SWAP_16BIT_CSP "Enable byte swap for 16 bit colorspaces." OFF)
|
||||
|
||||
if(WEBP_BUILD_WEBP_JS)
|
||||
if(WEBP_BUILD_WEBP_JS OR WEBP_ENABLE_WASM)
|
||||
set(WEBP_ENABLE_SIMD OFF)
|
||||
endif()
|
||||
|
||||
@ -27,14 +27,19 @@ if(NOT CMAKE_BUILD_TYPE)
|
||||
)
|
||||
endif()
|
||||
|
||||
# Include dependencies.
|
||||
include(cmake/deps.cmake)
|
||||
include(GNUInstallDirs)
|
||||
include(cmake/config.h.cmake)
|
||||
|
||||
# Extract the version of the library.
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/configure.ac SOURCE_FILE)
|
||||
string(REGEX MATCH "[0-9.]+" WEBP_VERSION ${SOURCE_FILE})
|
||||
|
||||
################################################################################
|
||||
# Options.
|
||||
if(WEBP_ENABLE_SWAP_16BIT_CSP)
|
||||
add_definitions(-DWEBP_SWAP_16BIT_CSP=1)
|
||||
add_definitions(-DWEBP_SWAP_16BIT_CSP)
|
||||
endif()
|
||||
if(WEBP_ENABLE_WASM)
|
||||
add_definitions(-DWEBP_USE_WASM)
|
||||
endif()
|
||||
|
||||
################################################################################
|
||||
@ -49,10 +54,7 @@ if(ANDROID)
|
||||
set(WEBP_DEP_INCLUDE_DIRS ${WEBP_DEP_INCLUDE_DIRS}
|
||||
${ANDROID_NDK}/sources/android/cpufeatures
|
||||
)
|
||||
add_definitions(-DHAVE_CPU_FEATURES_H=1)
|
||||
set(HAVE_CPU_FEATURES_H 1)
|
||||
else()
|
||||
set(HAVE_CPU_FEATURES_H 0)
|
||||
add_definitions(-DHAVE_CPU_FEATURES_H)
|
||||
endif()
|
||||
|
||||
################################################################################
|
||||
@ -104,13 +106,8 @@ endforeach()
|
||||
|
||||
### Define the mandatory libraries.
|
||||
# Build the webpdecoder library.
|
||||
if(MSVC)
|
||||
# avoid security warnings for e.g., fopen() used in the examples.
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
|
||||
else()
|
||||
add_definitions(-Wall)
|
||||
endif()
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${WEBP_DEP_INCLUDE_DIRS})
|
||||
add_definitions(-Wall)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/ ${WEBP_DEP_INCLUDE_DIRS})
|
||||
add_library(webpdecode OBJECT ${WEBP_DEC_SRCS})
|
||||
add_library(webpdspdecode OBJECT ${WEBP_DSP_COMMON_SRCS} ${WEBP_DSP_DEC_SRCS})
|
||||
add_library(webputilsdecode OBJECT ${WEBP_UTILS_COMMON_SRCS}
|
||||
@ -121,32 +118,13 @@ target_link_libraries(webpdecoder ${WEBP_DEP_LIBRARIES})
|
||||
|
||||
# Build the webp library.
|
||||
add_library(webpencode OBJECT ${WEBP_ENC_SRCS})
|
||||
target_include_directories(
|
||||
webpencode PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
add_library(webpdsp OBJECT ${WEBP_DSP_COMMON_SRCS} ${WEBP_DSP_DEC_SRCS}
|
||||
${WEBP_DSP_ENC_SRCS})
|
||||
target_include_directories(webpdsp PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
${WEBP_DSP_ENC_SRCS})
|
||||
add_library(webputils OBJECT ${WEBP_UTILS_COMMON_SRCS} ${WEBP_UTILS_DEC_SRCS}
|
||||
${WEBP_UTILS_ENC_SRCS})
|
||||
target_include_directories(webputils PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
${WEBP_UTILS_ENC_SRCS})
|
||||
add_library(webp $<TARGET_OBJECTS:webpdecode> $<TARGET_OBJECTS:webpdsp>
|
||||
$<TARGET_OBJECTS:webpencode> $<TARGET_OBJECTS:webputils>)
|
||||
if(XCODE)
|
||||
libwebp_add_stub_file(webp)
|
||||
endif()
|
||||
$<TARGET_OBJECTS:webpencode> $<TARGET_OBJECTS:webputils>)
|
||||
target_link_libraries(webp ${WEBP_DEP_LIBRARIES})
|
||||
target_include_directories(
|
||||
webp PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
|
||||
$<INSTALL_INTERFACE:include>)
|
||||
set_target_properties(
|
||||
webp
|
||||
PROPERTIES PUBLIC_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/src/webp/decode.h;\
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/encode.h;\
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h")
|
||||
|
||||
# Make sure the OBJECT libraries are built with position independent code
|
||||
# (it is not ON by default).
|
||||
@ -156,17 +134,6 @@ set_target_properties(webpdecode webpdspdecode webputilsdecode
|
||||
# Build the webp demux library.
|
||||
add_library(webpdemux ${WEBP_DEMUX_SRCS})
|
||||
target_link_libraries(webpdemux webp)
|
||||
target_include_directories(
|
||||
webpdemux PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
|
||||
PUBLIC $<INSTALL_INTERFACE:include>)
|
||||
set_target_properties(
|
||||
webpdemux
|
||||
PROPERTIES
|
||||
PUBLIC_HEADER
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/webp/decode.h;\
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/demux.h;\
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux_types.h;\
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h")
|
||||
|
||||
# Set the version numbers.
|
||||
function(parse_version FILE NAME VAR)
|
||||
@ -178,13 +145,13 @@ function(parse_version FILE NAME VAR)
|
||||
set(${VAR} "${VERSION}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
parse_version(Makefile.am webp WEBP_WEBP_SOVERSION)
|
||||
set_target_properties(webp PROPERTIES VERSION ${PACKAGE_VERSION}
|
||||
set_target_properties(webp PROPERTIES VERSION ${WEBP_VERSION}
|
||||
SOVERSION ${WEBP_WEBP_SOVERSION})
|
||||
parse_version(Makefile.am webpdecoder WEBP_DECODER_SOVERSION)
|
||||
set_target_properties(webpdecoder PROPERTIES VERSION ${PACKAGE_VERSION}
|
||||
set_target_properties(webpdecoder PROPERTIES VERSION ${WEBP_VERSION}
|
||||
SOVERSION ${WEBP_DECODER_SOVERSION})
|
||||
parse_version(demux/Makefile.am webpdemux WEBP_DEMUX_SOVERSION)
|
||||
set_target_properties(webpdemux PROPERTIES VERSION ${PACKAGE_VERSION}
|
||||
set_target_properties(webpdemux PROPERTIES VERSION ${WEBP_VERSION}
|
||||
SOVERSION ${WEBP_DEMUX_SOVERSION})
|
||||
|
||||
# Define the libraries to install.
|
||||
@ -200,9 +167,11 @@ math(EXPR WEBP_SIMD_FILES_TO_INCLUDE_RANGE
|
||||
foreach(I_FILE RANGE ${WEBP_SIMD_FILES_TO_INCLUDE_RANGE})
|
||||
list(GET WEBP_SIMD_FILES_TO_INCLUDE ${I_FILE} FILE)
|
||||
list(GET WEBP_SIMD_FLAGS_TO_INCLUDE ${I_FILE} SIMD_COMPILE_FLAG)
|
||||
set_source_files_properties(${FILE} PROPERTIES
|
||||
COMPILE_FLAGS ${SIMD_COMPILE_FLAG}
|
||||
)
|
||||
if(NOT ${SIMD_COMPILE_FLAG} STREQUAL "NOTFOUND")
|
||||
set_source_files_properties(${FILE} PROPERTIES
|
||||
COMPILE_FLAGS ${SIMD_COMPILE_FLAG}
|
||||
)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Build the executables if asked for.
|
||||
@ -231,10 +200,6 @@ if(WEBP_BUILD_CWEBP OR WEBP_BUILD_DWEBP OR
|
||||
"imageenc_[^ ]*")
|
||||
add_library(imageenc ${IMAGEENC_SRCS})
|
||||
target_link_libraries(imageenc webp)
|
||||
|
||||
set_property(TARGET exampleutil imageioutil imagedec imageenc
|
||||
PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_DWEBP)
|
||||
@ -245,8 +210,6 @@ if(WEBP_BUILD_DWEBP)
|
||||
add_executable(dwebp ${DWEBP_SRCS})
|
||||
target_link_libraries(dwebp exampleutil imagedec imageenc webpdecoder)
|
||||
install(TARGETS dwebp RUNTIME DESTINATION bin)
|
||||
set_property(TARGET dwebp PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_CWEBP)
|
||||
@ -257,12 +220,6 @@ if(WEBP_BUILD_CWEBP)
|
||||
add_executable(cwebp ${CWEBP_SRCS})
|
||||
target_link_libraries(cwebp exampleutil imagedec webp)
|
||||
install(TARGETS cwebp RUNTIME DESTINATION bin)
|
||||
set_property(TARGET cwebp PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_GIF2WEBP AND NOT GIF_FOUND)
|
||||
unset(WEBP_BUILD_GIF2WEBP CACHE)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_GIF2WEBP OR WEBP_BUILD_IMG2WEBP)
|
||||
@ -271,13 +228,8 @@ if(WEBP_BUILD_GIF2WEBP OR WEBP_BUILD_IMG2WEBP)
|
||||
add_library(webpmux ${WEBP_MUX_SRCS})
|
||||
target_link_libraries(webpmux webp)
|
||||
parse_version(mux/Makefile.am webpmux WEBP_MUX_SOVERSION)
|
||||
set_target_properties(webpmux PROPERTIES VERSION ${PACKAGE_VERSION}
|
||||
set_target_properties(webpmux PROPERTIES VERSION ${WEBP_VERSION}
|
||||
SOVERSION ${WEBP_MUX_SOVERSION})
|
||||
set_target_properties(
|
||||
webpmux
|
||||
PROPERTIES PUBLIC_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux.h;\
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux_types.h;\
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h;")
|
||||
list(APPEND INSTALLED_LIBRARIES webpmux)
|
||||
endif()
|
||||
|
||||
@ -290,8 +242,6 @@ if(WEBP_BUILD_GIF2WEBP)
|
||||
target_link_libraries(gif2webp exampleutil imageioutil webp webpmux
|
||||
${WEBP_DEP_GIF_LIBRARIES})
|
||||
install(TARGETS gif2webp RUNTIME DESTINATION bin)
|
||||
set_property(TARGET gif2webp PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_IMG2WEBP)
|
||||
@ -302,8 +252,6 @@ if(WEBP_BUILD_IMG2WEBP)
|
||||
add_executable(img2webp ${IMG2WEBP_SRCS})
|
||||
target_link_libraries(img2webp exampleutil imagedec imageioutil webp webpmux)
|
||||
install(TARGETS img2webp RUNTIME DESTINATION bin)
|
||||
set_property(TARGET img2webp PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if (WEBP_BUILD_WEBPINFO)
|
||||
@ -314,8 +262,6 @@ if (WEBP_BUILD_WEBPINFO)
|
||||
add_executable(webpinfo ${WEBPINFO_SRCS})
|
||||
target_link_libraries(webpinfo exampleutil imageioutil)
|
||||
install(TARGETS webpinfo RUNTIME DESTINATION bin)
|
||||
set_property(TARGET webpinfo PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_WEBP_JS)
|
||||
@ -323,7 +269,6 @@ if(WEBP_BUILD_WEBP_JS)
|
||||
add_executable(webp_js
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/extras/webp_to_sdl.c)
|
||||
target_link_libraries(webp_js webpdecoder SDL)
|
||||
set(WEBP_HAVE_SDL 1)
|
||||
set_target_properties(webp_js PROPERTIES LINK_FLAGS
|
||||
"-s EXPORTED_FUNCTIONS='[\"_WebpToSDL\"]' -s INVOKE_RUN=0")
|
||||
set_target_properties(webp_js PROPERTIES OUTPUT_NAME webp)
|
||||
@ -341,33 +286,23 @@ if(WEBP_BUILD_WEBP_JS)
|
||||
target_compile_definitions(webpdecoder PUBLIC EMSCRIPTEN)
|
||||
endif()
|
||||
|
||||
# Generate the config.h file.
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/cmake/config.h.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/src/webp/config.h)
|
||||
add_definitions(-DHAVE_CONFIG_H)
|
||||
# The webp folder is included as we reference config.h as
|
||||
# ../webp/config.h or webp/config.h
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
# Install the different headers and libraries.
|
||||
install(
|
||||
TARGETS ${INSTALLED_LIBRARIES}
|
||||
EXPORT WebPTargets
|
||||
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/webp
|
||||
INCLUDES
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
set(ConfigPackageLocation ${CMAKE_INSTALL_DATADIR}/WebP/cmake/)
|
||||
install(EXPORT WebPTargets NAMESPACE WebP::
|
||||
DESTINATION ${ConfigPackageLocation})
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/webp/decode.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/demux.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/encode.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/mux_types.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h
|
||||
DESTINATION include/webp)
|
||||
install(TARGETS ${INSTALLED_LIBRARIES}
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
|
||||
# Create the CMake version file.
|
||||
include(CMakePackageConfigHelpers)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/WebPConfigVersion.cmake"
|
||||
VERSION ${PACKAGE_VERSION}
|
||||
VERSION ${WEBP_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
)
|
||||
|
||||
@ -378,7 +313,7 @@ configure_package_config_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/WebPConfig.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/WebPConfig.cmake
|
||||
INSTALL_DESTINATION ${ConfigPackageLocation}
|
||||
PATH_VARS CMAKE_INSTALL_INCLUDEDIR)
|
||||
)
|
||||
|
||||
# Install the generated CMake files.
|
||||
install(
|
||||
|
285
ChangeLog
285
ChangeLog
@ -1,294 +1,9 @@
|
||||
f66955de WEBP_REDUCE_CSP: restrict colorspace support
|
||||
a289d8e7 update ChangeLog (tag: v0.6.1-rc2)
|
||||
c10a493c vwebp: disable double buffering on windows & mac
|
||||
0d4466c2 webp_to_sdl.c: fix file mode
|
||||
1b27bf8b WEBP_REDUCE_SIZE: disable all rescaler code
|
||||
126be109 webpinfo: add -version option
|
||||
9add62b5 bump version to 0.6.1
|
||||
d3e26144 update NEWS
|
||||
2edda639 README: add webpinfo section
|
||||
9ca568ef Merge "right-size some tables"
|
||||
31f1995c Merge "SSE2 implementation of HasAlphaXXX"
|
||||
a80c46bd SSE2 implementation of HasAlphaXXX
|
||||
083507f2 right-size some tables
|
||||
2e5785b2 anim_utils.c: remove warning when !defined(WEBP_HAVE_GIF)
|
||||
b299c47e add WEBP_REDUCE_SIZE
|
||||
f593d71a enc: disable pic->stats/extra_info w/WEBP_DISABLE_STATS
|
||||
541179a9 Merge "predictor_enc: fix build w/--disable-near-lossless"
|
||||
5755a7ec predictor_enc: fix build w/--disable-near-lossless
|
||||
eab5bab7 add WEBP_DISABLE_STATS
|
||||
8052c585 remove some petty TODOs from vwebp.
|
||||
c245343d move LOAD8x4 and STORE8x2 closer to their use location
|
||||
b9e734fd dec,cosmetics: normalize function naming style
|
||||
c188d546 dec: harmonize function suffixes
|
||||
28c5ac81 dec_sse41: harmonize function suffixes
|
||||
e65b72a3 Merge "introduce WebPHasAlpha8b and WebPHasAlpha32b"
|
||||
b94cee98 dec_sse2: remove HE8uv_SSE2
|
||||
44a0ee3f introduce WebPHasAlpha8b and WebPHasAlpha32b
|
||||
aebf59ac Merge "WebPPictureAllocARGB: align argb allocation"
|
||||
c184665e WebPPictureAllocARGB: align argb allocation
|
||||
3daf7509 WebPParseHeaders: remove obsolete animation TODO
|
||||
80285d97 cmake: avoid security warnings under msvc
|
||||
650eac55 cmake: don't set -Wall with MSVC
|
||||
c462cd00 Remove useless code.
|
||||
01a98217 Merge "remove WebPWorkerImpl declaration from the header"
|
||||
3c49fc47 Merge "thread_utils: fix potentially bad call to Execute"
|
||||
fde2782e thread_utils: fix potentially bad call to Execute
|
||||
2a270c1d remove WebPWorkerImpl declaration from the header
|
||||
f1f437cc remove mention of 'lossy-only parameters' from the doc
|
||||
3879074d Merge "WebPMemToUint32: remove ptr cast to int"
|
||||
04b029d2 WebPMemToUint32: remove ptr cast to int
|
||||
b7971d0e dsp: avoid defining _C functions w/NEON builds
|
||||
6ba98764 webpdec: correct alloc size check w/use_argb
|
||||
5cfb3b0f normalize include guards
|
||||
f433205e Merge changes Ia17c7dfc,I75423abb,Ia2f716b4,I161caa14,I4210081a, ...
|
||||
8d033b14 {dec,enc}_neon: harmonize function suffixes x2
|
||||
0295e981 upsampling_neon: harmonize function suffixes
|
||||
d572c4e5 yuv_neon: harmonize function suffixes
|
||||
ab9c2500 rescaler_neon: harmonize function suffixes
|
||||
93e0ce27 lossless_neon: harmonize function suffixes
|
||||
22fbc50e lossless_enc_neon: harmonize function suffixes
|
||||
447875b4 filters_neon,cosmetics: fix indent
|
||||
e51bdd43 remove unused VP8TokenToStats() function
|
||||
785da7ea enc_neon: harmonize function suffixes
|
||||
bc1a251f dec_neon: harmonize function suffixes
|
||||
61e535f1 dsp/lossless: workaround gcc-4.8 bug on arm
|
||||
68b2eab7 cwebp: fix alpha reporting w/lossless & metadata
|
||||
30042faa WebPDemuxGetI: add doc details around WebPFormatFeature
|
||||
0a17f471 Merge "WIP: list includes as descendants of the project dir"
|
||||
a4399721 WIP: list includes as descendants of the project dir
|
||||
08275708 Merge "Make sure we reach the full range for alpha blending."
|
||||
d361a6a7 yuv_sse2: harmonize function suffixes
|
||||
6921aa6f upsampling_sse2: harmonize function suffixes
|
||||
08c67d3e ssim_sse2: harmonize function suffixes
|
||||
582a1b57 rescaler_sse2: harmonize function suffixes
|
||||
2c1b18ba lossless_sse2: harmonize function suffixes
|
||||
0ac46e81 lossless_enc_sse2: harmonize function suffixes
|
||||
bc634d57 enc_sse2: harmonize function suffixes
|
||||
bcb7347c dec_sse2: harmonize function suffixes
|
||||
e14ad93c Make sure we reach the full range for alpha blending.
|
||||
7038ca8d demux,StoreFrame: restore hdr size check to min req
|
||||
fb3daad6 cpu: fix ssse3 check
|
||||
be590e06 Merge "Fix CMake redefinition for HAVE_CPU_FEATURES_H"
|
||||
35f736e1 Fix CMake redefinition for HAVE_CPU_FEATURES_H
|
||||
a5216efc Fix integer overflow warning.
|
||||
a9c8916b decode.h,WebPIDecGetRGB: clarify output ptr validity
|
||||
3c74c645 gif2webp: handle 1-frame case properly + fix anim_diff
|
||||
c7f295d3 Merge "gif2webp: introduce -loop_compatibility option"
|
||||
b4e04677 gif2webp: introduce -loop_compatibility option
|
||||
f78da3de add LOCAL_CLANG_PREREQ and avoid WORK_AROUND_GCC w/3.8+
|
||||
01c426f1 define WEBP_USE_INTRINSICS w/gcc-4.9+
|
||||
8635973d use sdl-config (if available) to determine the link flags
|
||||
e9459382 use CPPFLAGS before CFLAGS
|
||||
4a9d788e Merge "Android.mk,mips: fix clang build with r15"
|
||||
4fbdc9fb Android.mk,mips: fix clang build with r15
|
||||
a80fcc4a ifdef code not used by Chrome/Android.
|
||||
3993af12 Fix signed integer overflows.
|
||||
f66f94ef anim_dump: small tool to dump frames from animated WebP
|
||||
6eba857b Merge "rationalize the Makefile.am"
|
||||
c5e34fba function definition cleanup
|
||||
3822762a rationalize the Makefile.am
|
||||
501ef6e4 configure style fix: animdiff -> anim_diff
|
||||
f8bdc268 Merge "protect against NULL dump_folder[] value in ReadAnimatedImage()"
|
||||
23bfc652 protect against NULL dump_folder[] value in ReadAnimatedImage()
|
||||
8dc3d71b cosmetics,ReadAnimatedWebP: correct function comment
|
||||
5bd40066 Merge changes I66a64a0a,I4d2e520f
|
||||
7945575c cosmetics,webpinfo: remove an else after a return
|
||||
8729fa11 cosmetics,cwebp: remove an else after a return
|
||||
f324b7f9 cosmetics: normalize fn proto & decl param names
|
||||
869eb369 CMake cleanups.
|
||||
289e62a3 Remove declaration of unimplemented VP8ApplyNearLosslessPredict
|
||||
20a94186 pnmdec,PAM: validate depth before calculating bytes_per_px
|
||||
34130afe anim_encode: fix integer overflow
|
||||
42c79aa6 Merge "Encoder: harmonize function suffixes"
|
||||
b09307dc Encoder: harmonize function suffixes
|
||||
bed0456d Merge "SSIM: harmonize the function suffix"
|
||||
54f6a3cf lossless_sse2.c: fix some missed suffix changes
|
||||
088f1dcc SSIM: harmonize the function suffix
|
||||
86fc4dd9 webpdec: use ImgIoUtilCheckSizeArgumentsOverflow
|
||||
08ea9ecd imageio: add ability restrict max image size
|
||||
6f9daa4a jpegdec,ReadError: fix leaks on error
|
||||
a0f72a4f VP8LTransformColorFunc: drop an non-respected 'const' from the signature.
|
||||
8c934902 Merge "Lossess dec: harmonize the function suffixes"
|
||||
622242aa Lossess dec: harmonize the function suffixes
|
||||
1411f027 Lossless Enc: harmonize the function suffixes
|
||||
24ad2e3c add const to two variables
|
||||
46efe062 Merge "Allow the lossless cruncher to work for alpha."
|
||||
8c3f9a47 Speed-up LZ77.
|
||||
1aef4c71 Allow the lossless cruncher to work for alpha.
|
||||
b8821dbd Improve the box LZ77 speed.
|
||||
7beed280 add missing ()s to macro parameters
|
||||
6473d20b Merge "fix Android standalone toolchain build"
|
||||
dcefed95 Merge "build.gradle: fix arm64 build"
|
||||
0c83a8bc Merge "yuv: harmonize suffix naming"
|
||||
c6d1db4b fix Android standalone toolchain build
|
||||
663a6d9d unify the ALTERNATE_CODE flag usage
|
||||
73ea9f27 yuv: harmonize suffix naming
|
||||
c71b68ac build.gradle: fix arm64 build
|
||||
c4568b47 Rescaler: harmonize the suffix naming
|
||||
6cb13b05 Merge "alpha_processing: harmonize the naming suffixes to be _C()"
|
||||
83a3e69a Merge "simplify WEBP_EXTERN macro"
|
||||
7295fde2 Merge "filters: harmonize the suffixes naming to _SSE2(), _C(), etc."
|
||||
8e42ba4c simplify WEBP_EXTERN macro
|
||||
331ab34b cost*.c: harmonize the suffix namings
|
||||
b161f670 filters: harmonize the suffixes naming to _SSE2(), _C(), etc.
|
||||
dec5e4d3 alpha_processing: harmonize the naming suffixes to be _C()
|
||||
6878d427 fix memory leak in SDL_Init()
|
||||
461ae555 Merge "configure: fix warnings in sdl check"
|
||||
62486a22 configure: test for -Wundef
|
||||
92982609 dsp.h: fix -Wundef w/__mips_dsp_rev
|
||||
0265cede configure: fix warnings in sdl check
|
||||
88c73d8a backward_references_enc.h: fix WINDOW_SIZE_BITS check
|
||||
4ea49f6b rescaler_sse2.c: fix WEBP_RESCALER_FIX -> _RFIX typo
|
||||
1b526638 Clean-up some CMake
|
||||
87f57a4b Merge "cmake: fix gif lib detection when cross compiling"
|
||||
b34a9db1 cosmetics,dec_sse2: remove some redundant comments
|
||||
471c5755 cmake: fix gif lib detection when cross compiling
|
||||
c793417a cmake: disable gif2webp if gif lib isn't found
|
||||
dcbc1c88 cmake: split gif detection from IMG deps
|
||||
66ad84f0 Merge "muxread: remove unreachable code"
|
||||
50ec3ab7 muxread: remove unreachable code
|
||||
7d67a164 Lossy encoding: smoothen transparent areas to improve compression
|
||||
e50650c7 Merge "fix signature for DISABLE_TOKEN_BUFFER compilation"
|
||||
671d2567 fix signature for DISABLE_TOKEN_BUFFER compilation
|
||||
d6755580 cpu.cmake: use unique flag to test simd disable flags
|
||||
28914528 Merge "Remove the argb* files."
|
||||
8acb4942 Remove the argb* files.
|
||||
3b62347b README: correct cmake invocation note
|
||||
7ca0df13 Have the SSE2 version of PackARGB use common code.
|
||||
7b250459 Merge "Re-use the transformed image when trying several LZ77 in lossless."
|
||||
e132072f Re-use the transformed image when trying several LZ77 in lossless.
|
||||
5d7a50ef Get code to compile in C++.
|
||||
7b012987 configure: test for -Wparentheses-equality
|
||||
f0569adb Fix man pages for multi-threading.
|
||||
f1d5a397 multithread cruncher: only copy stats when picture->stats != NULL
|
||||
f8c2ac15 Multi-thread the lossless cruncher.
|
||||
a88c6522 Merge "Integrate a new LZ77 looking for matches in the neighborhood of a pixel only."
|
||||
8f6df1d0 Unroll Predictors 10, 11 and 12.
|
||||
355c3d1b Integrate a new LZ77 looking for matches in the neighborhood of a pixel only.
|
||||
a1779a01 Refactor LZ77 handling in preparation for a new method.
|
||||
67de68b5 Android.mk/build.gradle: fix mips build with clang from r14b
|
||||
f209a548 Use the plane code and not the distance when computing statistics.
|
||||
b903b80c Split cost-based backward references in its own file.
|
||||
498cad34 Cosmetic changes in backward reference.
|
||||
e4eb4587 lossless, VP8LTransformColor_C: make sure no overflow happens with colors.
|
||||
af6deaff webpinfo: handle alpha flag mismatch
|
||||
7caef29b Fix typo that creeped in.
|
||||
39e19f92 Merge "near lossless: fix unsigned int overflow warnings."
|
||||
9bbc0891 near lossless: fix unsigned int overflow warnings.
|
||||
e1118d62 Merge "cosmetics,FindClosestDiscretized: use uint in mask creation"
|
||||
186bc9b7 Merge "webpinfo: tolerate ALPH+VP8L"
|
||||
b5887297 cosmetics,FindClosestDiscretized: use uint in mask creation
|
||||
f1784aee near_lossless,FindClosestDiscretized: use unsigned ops
|
||||
0d20abb3 webpinfo: tolerate ALPH+VP8L
|
||||
972104b3 webpmux: tolerate false positive Alpha flag
|
||||
dd7e83cc tiffdec,ReadTIFF: ensure data_size is < tsize_t max
|
||||
d988eb7b tiffdec,MyRead: quiet -Wshorten-64-to-32 warning
|
||||
dabda707 webpinfo: add support to parse Alpha bitstream
|
||||
4c117643 webpinfo: correct background color output, BGRA->ARGB
|
||||
defc98d7 Doc: clarify the role of quality in WebPConfig.
|
||||
d78ff780 Merge "Fix code to compile with C++."
|
||||
c8f14093 Fix code to compile with C++.
|
||||
497dc6a7 pnmdec: sanitize invalid header output
|
||||
d78e5867 Merge "configure: test for -Wconstant-conversion"
|
||||
481e91eb Merge "pnmdec,PAM: set bytes_per_px based on depth when missing"
|
||||
93b12753 configure: test for -Wconstant-conversion
|
||||
645f0c53 pnmdec,PAM: set bytes_per_px based on depth when missing
|
||||
e9154605 Merge "vwebp: activate GLUT double-buffering"
|
||||
818d795b vwebp: activate GLUT double-buffering
|
||||
d63e6f4b Add a man page for webpinfo
|
||||
4d708435 Merge "NEON: implement ConvertRGB24ToY/BGR24/ARGB/RGBA32ToUV/ARGBToUV"
|
||||
faf42213 NEON: implement ConvertRGB24ToY/BGR24/ARGB/RGBA32ToUV/ARGBToUV
|
||||
b4d576fa Install man pages with CMake.
|
||||
cbc1b921 webpinfo: add features to parse bitstream header
|
||||
e644c556 Fix bad bit writer initialization.
|
||||
b62cdad2 Merge "Implement a cruncher for lossless at method 6."
|
||||
da3e4dfb use the exact constant for the gamma transfer function
|
||||
a9c701e0 Merge "tiffdec: fix EXTRASAMPLES check"
|
||||
adab8ce0 Implement a cruncher for lossless at method 6.
|
||||
1b92b237 Merge "Fix VP8ApplyNearLossless to respect const and stride."
|
||||
1923ff02 tiffdec: fix EXTRASAMPLES check
|
||||
97cce5ba tiffdec: only request EXTRASAMPLES w/> 3 samples/px
|
||||
0dcd85b6 Fix VP8ApplyNearLossless to respect const and stride.
|
||||
f7682189 yuv: rationalize the C/SSE2 function naming
|
||||
52245424 NEON implementation of some Sharp-YUV420 functions
|
||||
690efd82 Avoid several backward reference copies.
|
||||
4bb1f607 src/dec/vp8_dec.h, cosmetics: fix comments
|
||||
285748be cmake: build/install webpinfo
|
||||
78fd199c backward_references_enc.c: clear -Wshadow warnings
|
||||
ae836410 WebPLog2FloorC: clear -Wshadow warning
|
||||
d0b7404e Merge "WASM support"
|
||||
134e314f WASM support
|
||||
c08adb6f Merge "VP8LEnc: remove use of BitsLog2Ceiling()"
|
||||
28c37ebd VP8LEnc: remove use of BitsLog2Ceiling()
|
||||
2cb58ab2 webpinfo: output format as a human readable string
|
||||
bb175a93 Merge "rename some symbols clashing with MSVC headers"
|
||||
39eda658 Remove a duplicated pixel hash implementation.
|
||||
36b8274d rename some symbols clashing with MSVC headers
|
||||
274daf54 Add webpinfo tool.
|
||||
ec5036e4 add explicit reference to /usr/local/{lib,inc}
|
||||
18f0dfac Merge "fix TIFF encoder regarding rgbA/RGBA"
|
||||
4e2b0b50 Merge "webpdec.h: fix a doc typo"
|
||||
e2eeabff Merge "Install binaries, libraries and headers in CMake."
|
||||
836607e6 webpdec.h: fix a doc typo
|
||||
9273e441 fix TIFF encoder regarding rgbA/RGBA
|
||||
17e3c11f Add limited PAM decoding support
|
||||
5f624871 Install binaries, libraries and headers in CMake.
|
||||
976adac1 Merge "lossless incremental decoding: fix missing eos_ test"
|
||||
f8fad4fa lossless incremental decoding: fix missing eos_ test
|
||||
27415d41 Merge "vwebp_sdl: fix the makefile.unix"
|
||||
49566182 Merge "ImgIoUtilWriteFile(): use ImgIoUtilSetBinaryMode"
|
||||
6f75a51b Analyze the transform entropy on the whole image.
|
||||
a5e4e3af Use palette only if we can in entropy analysis.
|
||||
75a9c3c4 Improve compression by better entropy analysis.
|
||||
39cf6f4f vwebp_sdl: fix the makefile.unix
|
||||
699b0416 ImgIoUtilWriteFile(): use ImgIoUtilSetBinaryMode
|
||||
7d985bd1 Fix small entropy analysis bug.
|
||||
6e7caf06 Optimize the color cache size.
|
||||
833c9219 More efficient stochastic histogram merge.
|
||||
5183326b Refactor the greedy histogram merge.
|
||||
99f6f462 Merge "histogram_enc.c,MyRand: s/ul/u/ for unsigned constants"
|
||||
80a22186 ssim.c: remove dead include
|
||||
a128dfff histogram_enc.c,MyRand: s/ul/u/ for unsigned constants
|
||||
693bf74e move the SSIM calculation code in ssim.c / ssim_sse2.c
|
||||
10d791ca Merge "Fix the random generator in HistogramCombineStochastic."
|
||||
fa63a966 Fix the random generator in HistogramCombineStochastic.
|
||||
16be192f VP8LSetBitPos: remove the eos_ setting
|
||||
027151ca don't erase the surface before blitting.
|
||||
4105d565 disable WEBP_USE_XXX optimisations when EMSCRIPTEN is defined
|
||||
9ee32a75 Merge "WebP-JS: emscripten-based Javascript decoder"
|
||||
ca9f7b7d WebP-JS: emscripten-based Javascript decoder
|
||||
868aa690 Perform greedy histogram merge in a unified way.
|
||||
5b393f2d Merge "fix path typo for vwebp_sdl in Makefile.vc"
|
||||
e0012bea CMake: only use libwebpdecoder for building dwebp
|
||||
84c2a7b0 fix path typo for vwebp_sdl in Makefile.vc
|
||||
1b0e4abf Merge "Add a flag to disable SIMD optimizations."
|
||||
32263250 Add a flag to disable SIMD optimizations.
|
||||
b494fdec optimize the ARGB->ARGB Import to use memcpy
|
||||
f1536039 Merge "ReadWebP: decode directly into a pre-allocated buffer"
|
||||
e69ed291 ReadWebP: decode directly into a pre-allocated buffer
|
||||
57d8de8a Merge "vwebp_sdl: simple viewer based on SDL"
|
||||
5cfd4ebc LZ77 interval speedups. Faster, smaller, simpler.
|
||||
1e7ad88b PNM header decoder: add some basic numerical validation
|
||||
17c7890c Merge "Add a decoder only library for WebP in CMake."
|
||||
be733786 Merge "Add clang build fix for MSA"
|
||||
03cda0e4 Add a decoder only library for WebP in CMake.
|
||||
aa893914 Add clang build fix for MSA
|
||||
31a92e97 Merge "imageio: add limited PNM support for reading"
|
||||
dcf9d82a imageio: add limited PNM support for reading
|
||||
6524fcd6 vwebp_sdl: simple viewer based on SDL
|
||||
6cf24a24 get_disto: fix reference file read
|
||||
43d472aa Merge tag 'v0.6.0'
|
||||
50d1a848 update ChangeLog (tag: v0.6.0, origin/0.6.0, 0.6.0)
|
||||
20a7fea0 extras/Makefile.am: fix libwebpextras.la reference
|
||||
415f3ffe update ChangeLog (tag: v0.6.0-rc3)
|
||||
3c6d1224 update NEWS
|
||||
ee4a4141 update AUTHORS
|
||||
32ed856f Fix "all|no frames are keyframes" settings.
|
||||
1c3190b6 Merge "Fix "all|no frames are keyframes" settings."
|
||||
f4dc56fd disable GradientUnfilter_NEON
|
||||
4f3e3bbd disable GradientUnfilter_NEON
|
||||
2dc0bdca Fix "all|no frames are keyframes" settings.
|
||||
0d8e0588 img2webp: treat -loop as a no-op w/single images
|
||||
b0450139 ReadImage(): restore size reporting
|
||||
0ad3b4ef update ChangeLog (tag: v0.6.0-rc2)
|
||||
|
16
Makefile.vc
16
Makefile.vc
@ -29,7 +29,7 @@ PLATFORM_LDFLAGS = /SAFESEH
|
||||
NOLOGO = /nologo
|
||||
CCNODBG = cl.exe $(NOLOGO) /O2 /DNDEBUG
|
||||
CCDEBUG = cl.exe $(NOLOGO) /Od /Gm /Zi /D_DEBUG /RTC1
|
||||
CFLAGS = /I. /Isrc $(NOLOGO) /W3 /EHsc /c
|
||||
CFLAGS = /Isrc $(NOLOGO) /W3 /EHsc /c
|
||||
CFLAGS = $(CFLAGS) /DWIN32 /D_CRT_SECURE_NO_WARNINGS /DWIN32_LEAN_AND_MEAN
|
||||
LDFLAGS = /LARGEADDRESSAWARE /MANIFEST /NXCOMPAT /DYNAMICBASE
|
||||
LDFLAGS = $(LDFLAGS) $(PLATFORM_LDFLAGS)
|
||||
@ -155,7 +155,6 @@ CFGSET = TRUE
|
||||
!MESSAGE - all - build (de)mux-based targets for CFG
|
||||
!MESSAGE - gif2webp - requires libgif & >= VS2013
|
||||
!MESSAGE - anim_diff - requires libgif & >= VS2013
|
||||
!MESSAGE - anim_dump
|
||||
!MESSAGE
|
||||
!MESSAGE RTLIBCFG controls the runtime library linkage - 'static' or 'dynamic'.
|
||||
!MESSAGE 'legacy' will produce a Windows 2000 compatible library.
|
||||
@ -234,6 +233,9 @@ DSP_DEC_OBJS = \
|
||||
$(DIROBJ)\dsp\yuv_sse2.obj \
|
||||
|
||||
DSP_ENC_OBJS = \
|
||||
$(DIROBJ)\dsp\argb.obj \
|
||||
$(DIROBJ)\dsp\argb_mips_dsp_r2.obj \
|
||||
$(DIROBJ)\dsp\argb_sse2.obj \
|
||||
$(DIROBJ)\dsp\cost.obj \
|
||||
$(DIROBJ)\dsp\cost_mips32.obj \
|
||||
$(DIROBJ)\dsp\cost_mips_dsp_r2.obj \
|
||||
@ -356,15 +358,10 @@ all: ex $(EXTRA_EXAMPLES)
|
||||
# C99 support which is only available from VS2013 onward.
|
||||
gif2webp: $(DIRBIN)\gif2webp.exe
|
||||
anim_diff: $(DIRBIN)\anim_diff.exe
|
||||
anim_dump: $(DIRBIN)\anim_dump.exe
|
||||
|
||||
$(DIRBIN)\anim_diff.exe: $(DIROBJ)\examples\anim_diff.obj $(EX_ANIM_UTIL_OBJS)
|
||||
$(DIRBIN)\anim_diff.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS)
|
||||
$(DIRBIN)\anim_diff.exe: $(EX_GIF_DEC_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
|
||||
$(DIRBIN)\anim_dump.exe: $(DIROBJ)\examples\anim_dump.obj $(EX_ANIM_UTIL_OBJS)
|
||||
$(DIRBIN)\anim_dump.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS)
|
||||
$(DIRBIN)\anim_dump.exe: $(EX_GIF_DEC_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
|
||||
$(DIRBIN)\anim_dump.exe: $(IMAGEIO_ENC_OBJS)
|
||||
$(DIRBIN)\cwebp.exe: $(DIROBJ)\examples\cwebp.obj $(IMAGEIO_DEC_OBJS)
|
||||
$(DIRBIN)\cwebp.exe: $(IMAGEIO_UTIL_OBJS)
|
||||
$(DIRBIN)\dwebp.exe: $(DIROBJ)\examples\dwebp.obj $(IMAGEIO_DEC_OBJS)
|
||||
@ -447,7 +444,7 @@ $(OUTPUT_DIRS):
|
||||
$(DIROBJ)\$(DLLINC):
|
||||
@echo #ifndef WEBP_DLL_H_ > $@
|
||||
@echo #define WEBP_DLL_H_ >> $@
|
||||
@echo #define WEBP_EXTERN __declspec(dllexport) >> $@
|
||||
@echo #define WEBP_EXTERN(type) __declspec(dllexport) type >> $@
|
||||
@echo #endif /* WEBP_DLL_H_ */ >> $@
|
||||
|
||||
.SUFFIXES: .c .obj .res .exe
|
||||
@ -459,9 +456,6 @@ $(DIROBJ)\dsp\enc_avx2.obj: src\dsp\enc_avx2.c
|
||||
$(DIROBJ)\examples\anim_diff.obj: examples\anim_diff.c
|
||||
$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
|
||||
/Fo$(DIROBJ)\examples\ examples\$(@B).c
|
||||
$(DIROBJ)\examples\anim_dump.obj: examples\anim_dump.c
|
||||
$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
|
||||
/Fo$(DIROBJ)\examples\ examples\$(@B).c
|
||||
$(DIROBJ)\examples\anim_util.obj: examples\anim_util.c
|
||||
$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
|
||||
/Fo$(DIROBJ)\examples\ examples\$(@B).c
|
||||
|
13
NEWS
13
NEWS
@ -1,16 +1,3 @@
|
||||
- 11/24/2017: version 0.6.1
|
||||
This is a binary compatible release.
|
||||
* lossless performance and compression improvements + a new 'cruncher' mode
|
||||
(-m 6 -q 100)
|
||||
* ARM performance improvements with clang (15-20% w/ndk r15c, issue #339)
|
||||
* webp-js: emscripten/webassembly based javascript decoder
|
||||
* miscellaneous bug & build fixes (issue #329, #332, #343, #353, #360, #361,
|
||||
#363)
|
||||
Tool updates / additions:
|
||||
added webpinfo - prints file format information (issue #330)
|
||||
gif2webp - loop behavior modified to match Chrome M63+ (crbug.com/649264);
|
||||
'-loop_compatibility' can be used for the old behavior
|
||||
|
||||
- 1/26/2017: version 0.6.0
|
||||
* lossless performance and compression improvements
|
||||
* miscellaneous performance improvements (SSE2, NEON, MSA)
|
||||
|
25
README
25
README
@ -4,7 +4,7 @@
|
||||
\__\__/\____/\_____/__/ ____ ___
|
||||
/ _/ / \ \ / _ \/ _/
|
||||
/ \_/ / / \ \ __/ \__
|
||||
\____/____/\_____/_____/____/v0.6.1
|
||||
\____/____/\_____/_____/____/v0.6.0
|
||||
|
||||
Description:
|
||||
============
|
||||
@ -113,8 +113,8 @@ make install
|
||||
|
||||
CMake:
|
||||
------
|
||||
With CMake, you can compile libwebp, cwebp, dwebp, gif2web, img2webp, webpinfo
|
||||
and the JS bindings.
|
||||
With CMake, you can compile libwebp, cwebp, dwebp, gif2web, img2webp and the
|
||||
JS bindings.
|
||||
|
||||
Prerequisites:
|
||||
A compiler (e.g., gcc with autotools) and CMake.
|
||||
@ -367,23 +367,6 @@ Use following options to convert into alternate image formats:
|
||||
-quiet ....... quiet mode, don't print anything
|
||||
-noasm ....... disable all assembly optimizations
|
||||
|
||||
WebP file analysis tool:
|
||||
========================
|
||||
|
||||
'webpinfo' can be used to print out the chunk level structure and bitstream
|
||||
header information of WebP files. It can also check if the files are of valid
|
||||
WebP format.
|
||||
|
||||
Usage: webpinfo [options] in_files
|
||||
Note: there could be multiple input files;
|
||||
options must come before input files.
|
||||
Options:
|
||||
-version ........... Print version number and exit.
|
||||
-quiet ............. Do not show chunk parsing information.
|
||||
-diag .............. Show parsing error diagnosis.
|
||||
-summary ........... Show chunk stats summary.
|
||||
-bitstream_info .... Parse bitstream header.
|
||||
|
||||
Visualization tool:
|
||||
===================
|
||||
|
||||
@ -494,8 +477,6 @@ Options:
|
||||
-metadata <string> ..... comma separated list of metadata to
|
||||
copy from the input to the output if present
|
||||
Valid values: all, none, icc, xmp (default)
|
||||
-loop_compatibility .... use compatibility mode for Chrome
|
||||
version prior to M62 (inclusive)
|
||||
-mt .................... use multi-threading if available
|
||||
|
||||
-version ............... print version number and exit
|
||||
|
@ -1,7 +1,7 @@
|
||||
__ __ ____ ____ ____ __ __ _ __ __
|
||||
/ \\/ \/ _ \/ _ \/ _ \/ \ \/ \___/_ / _\
|
||||
\ / __/ _ \ __/ / / (_/ /__
|
||||
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v0.4.1
|
||||
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v0.4.0
|
||||
|
||||
|
||||
Description:
|
||||
|
91
README.wasm
Normal file
91
README.wasm
Normal file
@ -0,0 +1,91 @@
|
||||
Description:
|
||||
============
|
||||
|
||||
This file describes the compilation of libwebp using portable intrinsics /
|
||||
WebAssembly (wasm) to native targets using clang and CMake.
|
||||
|
||||
Prerequisites:
|
||||
==============
|
||||
|
||||
- cmake 2.8+
|
||||
|
||||
- clang 3.9+ for portable intrinsics support; as wasm progresses a tip of tree
|
||||
build may be necessary.
|
||||
|
||||
Building:
|
||||
=========
|
||||
|
||||
- configure the project with CMake using:
|
||||
|
||||
$ mkdir -p build && \
|
||||
cd build && \
|
||||
cmake -DWEBP_BUILD_DWEBP=1 -DCMAKE_C_COMPILER=clang -DWEBP_ENABLE_WASM=1 ../
|
||||
|
||||
- compile dwebp using 'make'.
|
||||
|
||||
- Note this currently generates native executables only and is incompatible
|
||||
with -DWEBP_BUILD_WEBP_JS.
|
||||
|
||||
Build options:
|
||||
==============
|
||||
|
||||
- platform specific multiply high (mulhi) implementation, disabled by default.
|
||||
arm: -DCMAKE_C_FLAGS='-DENABLE_NEON_BUILTIN_MULHI_INT16X8 ...'
|
||||
x86: -DCMAKE_C_FLAGS='-DENABLE_X86_BUILTIN_MULHI_INT16X8 ...'
|
||||
|
||||
Cross compilation:
|
||||
==================
|
||||
|
||||
- arm toolchains can be obtained from:
|
||||
http://www.linaro.org/downloads/
|
||||
|
||||
- the android ndk can be obtained from:
|
||||
https://developer.android.com/ndk/downloads/index.html
|
||||
|
||||
armv7:
|
||||
------
|
||||
|
||||
Android:
|
||||
$ ./android-ndk-r15b/build/tools/make_standalone_toolchain.py \
|
||||
--arch arm --api 24 --stl gnustl --install-dir /opt/android-arm-24
|
||||
$ mkdir -p build && cd build
|
||||
$ cmake ../libwebp \
|
||||
-DWEBP_BUILD_DWEBP=1 \
|
||||
-DCMAKE_C_COMPILER=/opt/android-arm-24/bin/clang \
|
||||
-DCMAKE_PREFIX_PATH=/opt/android-arm-24/sysroot/usr/lib \
|
||||
-DCMAKE_C_FLAGS=-fPIE \
|
||||
-DCMAKE_EXE_LINKER_FLAGS=-Wl,-pie \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DWEBP_ENABLE_WASM=1
|
||||
|
||||
Linux:
|
||||
$ gcc_arm=/opt/gcc-arm; target=arm-linux-gnueabihf
|
||||
$ mkdir -p build && cd build
|
||||
$ cmake ../libwebp -DWEBP_BUILD_DWEBP=1 -DWEBP_ENABLE_WASM=1 \
|
||||
-DCMAKE_C_COMPILER=clang \
|
||||
-DCMAKE_C_FLAGS="--target=$target --gcc-toolchain=$gcc_arm --sysroot=$gcc_arm/$target/libc -march=armv7-a -mfpu=neon" \
|
||||
-DCMAKE_PREFIX_PATH=$gcc_arm/$target/libc/usr
|
||||
|
||||
aarch64 / arm64:
|
||||
----------------
|
||||
|
||||
Android:
|
||||
$ ./android-ndk-r15b/build/tools/make_standalone_toolchain.py \
|
||||
--arch arm64 --api 24 --stl gnustl --install-dir /opt/android-arm64-24
|
||||
$ mkdir -p build && cd build
|
||||
$ cmake ../libwebp \
|
||||
-DWEBP_BUILD_DWEBP=1 \
|
||||
-DCMAKE_C_COMPILER=/opt/android-arm64-24/bin/clang \
|
||||
-DCMAKE_PREFIX_PATH=/opt/android-arm64-24/sysroot/usr/lib \
|
||||
-DCMAKE_C_FLAGS=-fPIE \
|
||||
-DCMAKE_EXE_LINKER_FLAGS=-Wl,-pie \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DWEBP_ENABLE_WASM=1
|
||||
|
||||
Linux:
|
||||
$ gcc_arm=/opt/gcc-aarch64; target=aarch64-linux-gnu
|
||||
$ mkdir -p build && cd build
|
||||
$ cmake ../libwebp -DWEBP_BUILD_DWEBP=1 -DWEBP_ENABLE_WASM=1 \
|
||||
-DCMAKE_C_COMPILER=clang \
|
||||
-DCMAKE_C_FLAGS="--target=$target --gcc-toolchain=$gcc_arm --sysroot=$gcc_arm/$target/libc" \
|
||||
-DCMAKE_PREFIX_PATH=$gcc_arm/$target/libc/usr
|
@ -31,6 +31,11 @@ using Emscripten and CMake.
|
||||
- that's it! Upon completion, you should have the webp.js and
|
||||
webp.js.mem files generated.
|
||||
|
||||
- Note this generates both webp_js and webp_wasm without any SIMD enabled due
|
||||
to bugs with this toolchain associated with the SSE2 code.
|
||||
-DWEBP_ENABLE_WASM is currently meant to generate native (x86, arm)
|
||||
executables (dwebp, cwebp) and is incompatible with -DWEBP_BUILD_WEBP_JS.
|
||||
|
||||
The callable JavaScript function is WebPToSDL(), which decodes a raw WebP
|
||||
bitstream into a canvas. See webp_js/index.html for a simple usage sample.
|
||||
|
||||
|
@ -82,14 +82,12 @@ model {
|
||||
}
|
||||
}
|
||||
// Check for NEON usage.
|
||||
if (getTargetPlatform() == "arm") {
|
||||
if (getTargetPlatform() == "arm" || getTargetPlatform() == "arm64") {
|
||||
NEON = "c.neon"
|
||||
cCompiler.define "HAVE_CPU_FEATURES_H"
|
||||
} else {
|
||||
NEON = "c"
|
||||
}
|
||||
|
||||
cCompiler.args "-I" + file(".").absolutePath
|
||||
}
|
||||
// Link to pthread for shared libraries.
|
||||
withType(SharedLibraryBinarySpec) {
|
||||
@ -122,6 +120,9 @@ model {
|
||||
include "alpha_processing_neon.$NEON"
|
||||
include "alpha_processing_sse2.c"
|
||||
include "alpha_processing_sse41.c"
|
||||
include "argb.c"
|
||||
include "argb_mips_dsp_r2.c"
|
||||
include "argb_sse2.c"
|
||||
include "cpu.c"
|
||||
include "dec.c"
|
||||
include "dec_clip_tables.c"
|
||||
|
@ -1,19 +1,6 @@
|
||||
set(WebP_VERSION @PROJECT_VERSION@)
|
||||
set(WEBP_VERSION ${WebP_VERSION})
|
||||
|
||||
@PACKAGE_INIT@
|
||||
|
||||
if(@WEBP_USE_THREAD@)
|
||||
include(CMakeFindDependencyMacro)
|
||||
find_dependency(Threads REQUIRED)
|
||||
endif()
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/WebPTargets.cmake")
|
||||
|
||||
set_and_check(WebP_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
|
||||
set(WebP_INCLUDE_DIRS ${WebP_INCLUDE_DIR})
|
||||
set(WEBP_INCLUDE_DIRS ${WebP_INCLUDE_DIR})
|
||||
set(WebP_INCLUDE_DIRS "webp")
|
||||
set(WEBP_INCLUDE_DIRS ${WebP_INCLUDE_DIRS})
|
||||
set(WebP_LIBRARIES "@INSTALLED_LIBRARIES@")
|
||||
set(WEBP_LIBRARIES "${WebP_LIBRARIES}")
|
||||
|
||||
check_required_components(WebP)
|
||||
|
@ -70,43 +70,18 @@ foreach(I_LIB PNG JPEG TIFF)
|
||||
set(WEBP_HAVE_${I_LIB} ${${I_LIB}_FOUND})
|
||||
if(${I_LIB}_FOUND)
|
||||
list(APPEND WEBP_DEP_IMG_LIBRARIES ${${I_LIB}_LIBRARIES})
|
||||
list(APPEND WEBP_DEP_IMG_INCLUDE_DIRS
|
||||
${${I_LIB}_INCLUDE_DIR} ${${I_LIB}_INCLUDE_DIRS})
|
||||
list(APPEND WEBP_DEP_IMG_INCLUDE_DIRS ${${I_LIB}_INCLUDE_DIRS})
|
||||
endif()
|
||||
endforeach()
|
||||
if(WEBP_DEP_IMG_INCLUDE_DIRS)
|
||||
list(REMOVE_DUPLICATES WEBP_DEP_IMG_INCLUDE_DIRS)
|
||||
endif()
|
||||
|
||||
# GIF detection, gifdec isn't part of the imageio lib.
|
||||
include(CMakePushCheckState)
|
||||
set(WEBP_DEP_GIF_LIBRARIES)
|
||||
set(WEBP_DEP_GIF_INCLUDE_DIRS)
|
||||
find_package(GIF)
|
||||
set(WEBP_HAVE_GIF ${GIF_FOUND})
|
||||
if(GIF_FOUND)
|
||||
# GIF find_package only locates the header and library, it doesn't fail
|
||||
# compile tests when detecting the version, but falls back to 3 (as of at
|
||||
# least cmake 3.7.2). Make sure the library links to avoid incorrect
|
||||
# detection when cross compiling.
|
||||
cmake_push_check_state()
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${GIF_LIBRARIES})
|
||||
set(CMAKE_REQUIRED_INCLUDES ${GIF_INCLUDE_DIR})
|
||||
check_c_source_compiles("
|
||||
#include <gif_lib.h>
|
||||
int main(void) {
|
||||
(void)DGifOpenFileHandle;
|
||||
return 0;
|
||||
}
|
||||
" GIF_COMPILES
|
||||
)
|
||||
cmake_pop_check_state()
|
||||
if(GIF_COMPILES)
|
||||
list(APPEND WEBP_DEP_GIF_LIBRARIES ${GIF_LIBRARIES})
|
||||
list(APPEND WEBP_DEP_GIF_INCLUDE_DIRS ${GIF_INCLUDE_DIR})
|
||||
else()
|
||||
unset(GIF_FOUND)
|
||||
endif()
|
||||
list(APPEND WEBP_DEP_GIF_LIBRARIES ${GIF_LIBRARIES})
|
||||
list(APPEND WEBP_DEP_GIF_INCLUDE_DIRS ${GIF_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
## Check for specific headers.
|
||||
@ -164,3 +139,13 @@ strip_bracket(PACKAGE_URL)
|
||||
set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
|
||||
set(PACKAGE_TARNAME ${PACKAGE_NAME})
|
||||
set(VERSION ${PACKAGE_VERSION})
|
||||
|
||||
## Generate the config.h header.
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/config.h.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/include/webp/config.h)
|
||||
add_definitions(-DHAVE_CONFIG_H)
|
||||
# The webp folder is included as we reference config.h as
|
||||
# ../webp/config.h or webp/config.h
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR}/include
|
||||
${CMAKE_CURRENT_BINARY_DIR}/include/webp
|
||||
)
|
@ -13,9 +13,6 @@
|
||||
/* Set to 1 if __builtin_bswap64 is available */
|
||||
#cmakedefine HAVE_BUILTIN_BSWAP64 1
|
||||
|
||||
/* Define to 1 if you have the <cpu-features.h> header file. */
|
||||
#cmakedefine HAVE_CPU_FEATURES_H 1
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#cmakedefine HAVE_DLFCN_H 1
|
||||
|
||||
@ -118,19 +115,9 @@
|
||||
/* Set to 1 if JPEG library is installed */
|
||||
#cmakedefine WEBP_HAVE_JPEG 1
|
||||
|
||||
/* Set to 1 if NEON is supported */
|
||||
#cmakedefine WEBP_HAVE_NEON
|
||||
|
||||
/* Set to 1 if runtime detection of NEON is enabled */
|
||||
/* TODO: handle properly in CMake */
|
||||
#cmakedefine WEBP_HAVE_NEON_RTCD
|
||||
|
||||
/* Set to 1 if PNG library is installed */
|
||||
#cmakedefine WEBP_HAVE_PNG 1
|
||||
|
||||
/* Set to 1 if SDL library is installed */
|
||||
#cmakedefine WEBP_HAVE_SDL 1
|
||||
|
||||
/* Set to 1 if SSE2 is supported */
|
||||
#cmakedefine WEBP_HAVE_SSE2 1
|
||||
|
||||
@ -140,9 +127,6 @@
|
||||
/* Set to 1 if TIFF library is installed */
|
||||
#cmakedefine WEBP_HAVE_TIFF 1
|
||||
|
||||
/* Enable near lossless encoding */
|
||||
#cmakedefine WEBP_NEAR_LOSSLESS 1
|
||||
|
||||
/* Undefine this to disable thread support. */
|
||||
#cmakedefine WEBP_USE_THREAD 1
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
## Check for SIMD extensions.
|
||||
include(CMakePushCheckState)
|
||||
|
||||
function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
|
||||
if(NOT ENABLE_SIMD)
|
||||
@ -8,8 +7,6 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
|
||||
return()
|
||||
endif()
|
||||
unset(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG} CACHE)
|
||||
cmake_push_check_state()
|
||||
set(CMAKE_REQUIRED_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
check_c_source_compiles("
|
||||
#include \"${CMAKE_CURRENT_LIST_DIR}/../src/dsp/dsp.h\"
|
||||
int main(void) {
|
||||
@ -20,7 +17,6 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
|
||||
}
|
||||
" WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG}
|
||||
)
|
||||
cmake_pop_check_state()
|
||||
if(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG})
|
||||
set(WEBP_HAVE_${WEBP_SIMD_FLAG} 1 PARENT_SCOPE)
|
||||
else()
|
||||
@ -64,7 +60,6 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
|
||||
# First try with no extra flag added as the compiler might have default flags
|
||||
# (especially on Android).
|
||||
unset(WEBP_HAVE_${WEBP_SIMD_FLAG} CACHE)
|
||||
cmake_push_check_state()
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
webp_check_compiler_flag(${WEBP_SIMD_FLAG} ${WEBP_ENABLE_SIMD})
|
||||
if(NOT WEBP_HAVE_${WEBP_SIMD_FLAG})
|
||||
@ -90,8 +85,11 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
|
||||
foreach(FILE ${SIMD_FILES})
|
||||
list(APPEND WEBP_SIMD_FILES_NOT_TO_INCLUDE ${FILE})
|
||||
endforeach()
|
||||
# Explicitly disable SIMD.
|
||||
if(SIMD_DISABLE_FLAGS)
|
||||
# Explicitly disable SIMD. Avoid this with WASM to avoid an ICE with clang:
|
||||
# https://bugs.chromium.org/p/webp/issues/detail?id=350
|
||||
# WASM overrides the native SIMD so building it in is harmless aside from
|
||||
# binary size.
|
||||
if(NOT WEBP_ENABLE_WASM AND SIMD_DISABLE_FLAGS)
|
||||
list(GET SIMD_DISABLE_FLAGS ${I_SIMD} SIMD_COMPILE_FLAG)
|
||||
include(CheckCCompilerFlag)
|
||||
if(SIMD_COMPILE_FLAG)
|
||||
@ -106,12 +104,11 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
|
||||
set(COMMON_PATTERNS)
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_DEFINITIONS ${SIMD_COMPILE_FLAG})
|
||||
check_c_source_compiles("int main(void) {return 0;}"
|
||||
FLAG_${SIMD_COMPILE_FLAG}
|
||||
check_c_source_compiles("int main(void) {return 0;}" FLAG2
|
||||
FAIL_REGEX "warning: argument unused during compilation:"
|
||||
${COMMON_PATTERNS}
|
||||
)
|
||||
if(NOT FLAG_${SIMD_COMPILE_FLAG})
|
||||
if(NOT FLAG2)
|
||||
unset(HAS_COMPILE_FLAG CACHE)
|
||||
endif()
|
||||
endif()
|
||||
@ -121,5 +118,14 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
cmake_pop_check_state()
|
||||
endforeach()
|
||||
|
||||
## Add *_wasm.c files if enabled.
|
||||
if(WEBP_ENABLE_WASM)
|
||||
file(GLOB SIMD_FILES "${CMAKE_CURRENT_LIST_DIR}/../"
|
||||
"src/dsp/*_wasm.c"
|
||||
)
|
||||
foreach(FILE ${SIMD_FILES})
|
||||
list(APPEND WEBP_SIMD_FILES_TO_INCLUDE ${FILE})
|
||||
endforeach()
|
||||
endif()
|
||||
|
38
configure.ac
38
configure.ac
@ -1,4 +1,4 @@
|
||||
AC_INIT([libwebp], [0.6.1],
|
||||
AC_INIT([libwebp], [0.6.0],
|
||||
[https://bugs.chromium.org/p/webp],,
|
||||
[http://developers.google.com/speed/webp])
|
||||
AC_CANONICAL_HOST
|
||||
@ -79,7 +79,6 @@ TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wold-style-definition])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wparentheses-equality])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshadow])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshorten-64-to-32])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wundef])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunreachable-code])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused-but-set-variable])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused])
|
||||
@ -445,12 +444,12 @@ AS_IF([test "x$enable_sdl" != "xno"], [
|
||||
CLEAR_LIBVARS([SDL])
|
||||
WITHLIB_OPTION([sdl], [SDL])
|
||||
|
||||
sdl_header="no"
|
||||
$sdl_header = "no";
|
||||
LIBCHECK_PROLOGUE([SDL])
|
||||
AC_CHECK_HEADER([SDL/SDL.h], [sdl_header="SDL_SDL.h"],
|
||||
[AC_CHECK_HEADER([SDL.h], [sdl_header="SDL.h"],
|
||||
[AC_MSG_WARN(SDL library not available - no sdl.h)])])
|
||||
if test x"$sdl_header" != "xno"; then
|
||||
if test x"$sdl_header" != "xno" ; then
|
||||
AC_CHECK_LIB(SDL, SDL_Init,
|
||||
[SDL_LIBS="-lSDL"
|
||||
SDL_INCLUDES="-DWEBP_HAVE_SDL"
|
||||
@ -459,14 +458,14 @@ AS_IF([test "x$enable_sdl" != "xno"], [
|
||||
sdl_support=yes
|
||||
],
|
||||
AC_MSG_WARN(Optional SDL library not found),
|
||||
[$MATH_LIBS])
|
||||
if test x"$sdl_header" = "xSDL.h"; then
|
||||
[$MATH_LIBS]),
|
||||
if test x"$sdl_header" == "xSDL.h" ; then
|
||||
SDL_INCLUDES="$SDL_INCLUDES -DWEBP_HAVE_JUST_SDL_H"
|
||||
fi
|
||||
fi
|
||||
LIBCHECK_EPILOGUE([SDL])
|
||||
|
||||
if test "$sdl_support" = "yes"; then
|
||||
if test "$sdl_support" = "yes" ; then
|
||||
build_vwebp_sdl=yes
|
||||
fi
|
||||
])
|
||||
@ -590,7 +589,7 @@ AS_IF([test "x$enable_gif" != "xno"], [
|
||||
|
||||
if test "$gif_support" = "yes" -a \
|
||||
"$enable_libwebpdemux" = "yes"; then
|
||||
build_anim_diff=yes
|
||||
build_animdiff=yes
|
||||
fi
|
||||
|
||||
if test "$gif_support" = "yes" -a \
|
||||
@ -598,7 +597,7 @@ AS_IF([test "x$enable_gif" != "xno"], [
|
||||
build_gif2webp=yes
|
||||
fi
|
||||
])
|
||||
AM_CONDITIONAL([BUILD_ANIMDIFF], [test "${build_anim_diff}" = "yes"])
|
||||
AM_CONDITIONAL([BUILD_ANIMDIFF], [test "${build_animdiff}" = "yes"])
|
||||
AM_CONDITIONAL([BUILD_GIF2WEBP], [test "${build_gif2webp}" = "yes"])
|
||||
|
||||
if test "$enable_libwebpmux" = "yes"; then
|
||||
@ -663,7 +662,7 @@ if test "$enable_wic" = "yes"; then
|
||||
fi
|
||||
esac
|
||||
|
||||
dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP=1
|
||||
dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP
|
||||
|
||||
USE_SWAP_16BIT_CSP=""
|
||||
AC_MSG_CHECKING(if --enable-swap-16bit-csp option is specified)
|
||||
@ -671,7 +670,7 @@ AC_ARG_ENABLE([swap-16bit-csp],
|
||||
AS_HELP_STRING([--enable-swap-16bit-csp],
|
||||
[Enable byte swap for 16 bit colorspaces]))
|
||||
if test "$enable_swap_16bit_csp" = "yes"; then
|
||||
USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP=1"
|
||||
USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP"
|
||||
fi
|
||||
AC_MSG_RESULT(${enable_swap_16bit_csp-no})
|
||||
AC_SUBST(USE_SWAP_16BIT_CSP)
|
||||
@ -689,21 +688,6 @@ fi
|
||||
AC_MSG_RESULT(${enable_experimental-no})
|
||||
AC_SUBST(USE_EXPERIMENTAL_CODE)
|
||||
|
||||
dnl === If --disable-near-lossless is defined, add -DWEBP_NEAR_LOSSLESS=0
|
||||
|
||||
AC_DEFINE(WEBP_NEAR_LOSSLESS, [1], [Enable near lossless encoding])
|
||||
AC_MSG_CHECKING(if --disable-near-lossless option is specified)
|
||||
AC_ARG_ENABLE([near_lossless],
|
||||
AS_HELP_STRING([--disable-near-lossless],
|
||||
[Disable near lossless encoding]),
|
||||
[], [enable_near_lossless=yes])
|
||||
if test "$enable_near_lossless" = "no"; then
|
||||
AC_DEFINE(WEBP_NEAR_LOSSLESS, [0], [Enable near lossless encoding])
|
||||
AC_MSG_RESULT([yes])
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
fi
|
||||
|
||||
dnl === Check whether libwebpmux should be built
|
||||
AC_MSG_CHECKING(whether libwebpmux is to be built)
|
||||
AC_ARG_ENABLE([libwebpmux],
|
||||
@ -778,7 +762,7 @@ dwebp : yes
|
||||
PNG : ${png_support-no}
|
||||
WIC : ${wic_support-no}
|
||||
GIF support : ${gif_support-no}
|
||||
anim_diff : ${build_anim_diff-no}
|
||||
anim_diff : ${build_animdiff-no}
|
||||
gif2webp : ${build_gif2webp-no}
|
||||
img2webp : ${build_img2webp-no}
|
||||
webpmux : ${enable_libwebpmux-no}
|
||||
|
@ -2,7 +2,7 @@ AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
|
||||
|
||||
bin_PROGRAMS = dwebp cwebp
|
||||
if BUILD_ANIMDIFF
|
||||
noinst_PROGRAMS = anim_diff anim_dump
|
||||
noinst_PROGRAMS = anim_diff
|
||||
endif
|
||||
if BUILD_GIF2WEBP
|
||||
bin_PROGRAMS += gif2webp
|
||||
@ -27,36 +27,20 @@ libexample_util_la_LIBADD = ../src/libwebp.la
|
||||
|
||||
anim_diff_SOURCES = anim_diff.c anim_util.c anim_util.h
|
||||
anim_diff_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
|
||||
anim_diff_LDADD =
|
||||
anim_diff_LDADD += ../src/demux/libwebpdemux.la
|
||||
anim_diff_LDADD += libexample_util.la
|
||||
anim_diff_LDADD += ../imageio/libimageio_util.la
|
||||
anim_diff_LDADD = ../src/demux/libwebpdemux.la
|
||||
anim_diff_LDADD += libexample_util.la ../imageio/libimageio_util.la
|
||||
anim_diff_LDADD += $(GIF_LIBS) -lm
|
||||
|
||||
anim_dump_SOURCES = anim_dump.c anim_util.c anim_util.h
|
||||
anim_dump_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(PNG_INCLUDES)
|
||||
anim_dump_CPPFLAGS += $(GIF_INCLUDES)
|
||||
anim_dump_LDADD =
|
||||
anim_dump_LDADD += ../src/demux/libwebpdemux.la
|
||||
anim_dump_LDADD += libexample_util.la
|
||||
anim_dump_LDADD += ../imageio/libimageio_util.la
|
||||
anim_dump_LDADD += ../imageio/libimageenc.la
|
||||
anim_dump_LDADD += $(PNG_LIBS) $(GIF_LIBS) $(TIFF_LIBS) -lm
|
||||
|
||||
cwebp_SOURCES = cwebp.c stopwatch.h
|
||||
cwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
cwebp_LDADD =
|
||||
cwebp_LDADD += libexample_util.la
|
||||
cwebp_LDADD += ../imageio/libimageio_util.la
|
||||
cwebp_LDADD += ../imageio/libimagedec.la
|
||||
cwebp_LDADD += ../src/libwebp.la
|
||||
cwebp_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
cwebp_LDADD += ../imageio/libimagedec.la ../src/libwebp.la
|
||||
cwebp_LDADD += $(JPEG_LIBS) $(PNG_LIBS) $(TIFF_LIBS)
|
||||
|
||||
dwebp_SOURCES = dwebp.c stopwatch.h
|
||||
dwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
dwebp_CPPFLAGS += $(JPEG_INCLUDES) $(PNG_INCLUDES)
|
||||
dwebp_LDADD =
|
||||
dwebp_LDADD += libexample_util.la
|
||||
dwebp_LDADD = libexample_util.la
|
||||
dwebp_LDADD += ../imageio/libimagedec.la
|
||||
dwebp_LDADD += ../imageio/libimageenc.la
|
||||
dwebp_LDADD += ../imageio/libimageio_util.la
|
||||
@ -65,52 +49,35 @@ dwebp_LDADD +=$(PNG_LIBS) $(JPEG_LIBS)
|
||||
|
||||
gif2webp_SOURCES = gif2webp.c gifdec.c gifdec.h
|
||||
gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
|
||||
gif2webp_LDADD =
|
||||
gif2webp_LDADD += libexample_util.la
|
||||
gif2webp_LDADD += ../imageio/libimageio_util.la
|
||||
gif2webp_LDADD += ../src/mux/libwebpmux.la
|
||||
gif2webp_LDADD += ../src/libwebp.la
|
||||
gif2webp_LDADD += $(GIF_LIBS)
|
||||
gif2webp_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
gif2webp_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la $(GIF_LIBS)
|
||||
|
||||
vwebp_SOURCES = vwebp.c
|
||||
vwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GL_INCLUDES)
|
||||
vwebp_LDADD =
|
||||
vwebp_LDADD += libexample_util.la
|
||||
vwebp_LDADD += ../imageio/libimageio_util.la
|
||||
vwebp_LDADD += ../src/demux/libwebpdemux.la
|
||||
vwebp_LDADD += $(GL_LIBS)
|
||||
vwebp_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
vwebp_LDADD += ../src/demux/libwebpdemux.la $(GL_LIBS)
|
||||
|
||||
webpmux_SOURCES = webpmux.c
|
||||
webpmux_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
webpmux_LDADD =
|
||||
webpmux_LDADD += libexample_util.la
|
||||
webpmux_LDADD += ../imageio/libimageio_util.la
|
||||
webpmux_LDADD += ../src/mux/libwebpmux.la
|
||||
webpmux_LDADD += ../src/libwebp.la
|
||||
webpmux_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
webpmux_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la
|
||||
|
||||
img2webp_SOURCES = img2webp.c
|
||||
img2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
img2webp_LDADD =
|
||||
img2webp_LDADD += libexample_util.la
|
||||
img2webp_LDADD += ../imageio/libimageio_util.la
|
||||
img2webp_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
img2webp_LDADD += ../imageio/libimagedec.la
|
||||
img2webp_LDADD += ../src/mux/libwebpmux.la
|
||||
img2webp_LDADD += ../src/libwebp.la
|
||||
img2webp_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la
|
||||
img2webp_LDADD += $(PNG_LIBS) $(JPEG_LIBS) $(TIFF_LIBS)
|
||||
|
||||
webpinfo_SOURCES = webpinfo.c
|
||||
webpinfo_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
webpinfo_LDADD =
|
||||
webpinfo_LDADD += libexample_util.la
|
||||
webpinfo_LDADD += ../imageio/libimageio_util.la
|
||||
webpinfo_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
webpinfo_LDADD += ../src/libwebp.la
|
||||
|
||||
if BUILD_LIBWEBPDECODER
|
||||
anim_diff_LDADD += ../src/libwebpdecoder.la
|
||||
anim_dump_LDADD += ../src/libwebpdecoder.la
|
||||
vwebp_LDADD += ../src/libwebpdecoder.la
|
||||
else
|
||||
anim_diff_LDADD += ../src/libwebp.la
|
||||
anim_dump_LDADD += ../src/libwebp.la
|
||||
vwebp_LDADD += ../src/libwebp.la
|
||||
endif
|
||||
|
@ -143,18 +143,8 @@ static int CompareAnimatedImagePair(const AnimatedImage* const img1,
|
||||
if (!ok) return 0; // These are fatal failures, can't proceed.
|
||||
|
||||
if (is_multi_frame_image) { // Checks relevant for multi-frame images only.
|
||||
int max_loop_count_workaround = 0;
|
||||
// Transcodes to webp increase the gif loop count by 1 for compatibility.
|
||||
// When the gif has the maximum value the webp value will be off by one.
|
||||
if ((img1->format == ANIM_GIF && img1->loop_count == 65536 &&
|
||||
img2->format == ANIM_WEBP && img2->loop_count == 65535) ||
|
||||
(img1->format == ANIM_WEBP && img1->loop_count == 65535 &&
|
||||
img2->format == ANIM_GIF && img2->loop_count == 65536)) {
|
||||
max_loop_count_workaround = 1;
|
||||
}
|
||||
ok = (max_loop_count_workaround ||
|
||||
CompareValues(img1->loop_count, img2->loop_count,
|
||||
"Loop count mismatch")) && ok;
|
||||
ok = CompareValues(img1->loop_count, img2->loop_count,
|
||||
"Loop count mismatch") && ok;
|
||||
ok = CompareBackgroundColor(img1->bgcolor, img2->bgcolor,
|
||||
premultiply) && ok;
|
||||
}
|
||||
|
@ -1,104 +0,0 @@
|
||||
// Copyright 2017 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Decodes an animated WebP file and dumps the decoded frames as PNG or TIFF.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h> // for 'strcmp'.
|
||||
|
||||
#include "./anim_util.h"
|
||||
#include "webp/decode.h"
|
||||
#include "../imageio/image_enc.h"
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1900
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
static void Help(void) {
|
||||
printf("Usage: anim_dump [options] files...\n");
|
||||
printf("\nOptions:\n");
|
||||
printf(" -folder <string> .... dump folder (default: '.')\n");
|
||||
printf(" -prefix <string> .... prefix for dumped frames "
|
||||
"(default: 'dump_')\n");
|
||||
printf(" -tiff ............... save frames as TIFF\n");
|
||||
printf(" -pam ................ save frames as PAM\n");
|
||||
}
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
int error = 0;
|
||||
const char* dump_folder = ".";
|
||||
const char* prefix = "dump_";
|
||||
const char* suffix = "png";
|
||||
WebPOutputFileFormat format = PNG;
|
||||
int c;
|
||||
|
||||
if (argc < 2) {
|
||||
Help();
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (c = 1; !error && c < argc; ++c) {
|
||||
if (!strcmp(argv[c], "-folder")) {
|
||||
if (c + 1 == argc) {
|
||||
fprintf(stderr, "missing argument after option '%s'\n", argv[c]);
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
dump_folder = argv[++c];
|
||||
} else if (!strcmp(argv[c], "-prefix")) {
|
||||
if (c + 1 == argc) {
|
||||
fprintf(stderr, "missing argument after option '%s'\n", argv[c]);
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
prefix = argv[++c];
|
||||
} else if (!strcmp(argv[c], "-tiff")) {
|
||||
format = TIFF;
|
||||
suffix = "tiff";
|
||||
} else if (!strcmp(argv[c], "-pam")) {
|
||||
format = PAM;
|
||||
suffix = "pam";
|
||||
} else {
|
||||
uint32_t i;
|
||||
AnimatedImage image;
|
||||
const char* const file = argv[c];
|
||||
memset(&image, 0, sizeof(image));
|
||||
printf("Decoding file: %s as %s/%sxxxx.%s\n",
|
||||
file, dump_folder, prefix, suffix);
|
||||
if (!ReadAnimatedImage(file, &image, 0, NULL)) {
|
||||
fprintf(stderr, "Error decoding file: %s\n Aborting.\n", file);
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
for (i = 0; !error && i < image.num_frames; ++i) {
|
||||
char out_file[1024];
|
||||
WebPDecBuffer buffer;
|
||||
WebPInitDecBuffer(&buffer);
|
||||
buffer.colorspace = MODE_RGBA;
|
||||
buffer.is_external_memory = 1;
|
||||
buffer.width = image.canvas_width;
|
||||
buffer.height = image.canvas_height;
|
||||
buffer.u.RGBA.rgba = image.frames[i].rgba;
|
||||
buffer.u.RGBA.stride = buffer.width * sizeof(uint32_t);
|
||||
buffer.u.RGBA.size = buffer.u.RGBA.stride * buffer.height;
|
||||
snprintf(out_file, sizeof(out_file), "%s/%s%.4d.%s",
|
||||
dump_folder, prefix, i, suffix);
|
||||
if (!WebPSaveImage(&buffer, format, out_file)) {
|
||||
fprintf(stderr, "Error while saving image '%s'\n", out_file);
|
||||
error = 1;
|
||||
}
|
||||
WebPFreeDecBuffer(&buffer);
|
||||
}
|
||||
ClearAnimatedImage(&image);
|
||||
}
|
||||
}
|
||||
return error ? 1 : 0;
|
||||
}
|
@ -16,7 +16,7 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(WEBP_HAVE_GIF)
|
||||
#ifdef WEBP_HAVE_GIF
|
||||
#include <gif_lib.h>
|
||||
#endif
|
||||
#include "webp/format_constants.h"
|
||||
@ -33,13 +33,11 @@ static const int kNumChannels = 4;
|
||||
// -----------------------------------------------------------------------------
|
||||
// Common utilities.
|
||||
|
||||
#if defined(WEBP_HAVE_GIF)
|
||||
// Returns true if the frame covers the full canvas.
|
||||
static int IsFullFrame(int width, int height,
|
||||
int canvas_width, int canvas_height) {
|
||||
return (width == canvas_width && height == canvas_height);
|
||||
}
|
||||
#endif // WEBP_HAVE_GIF
|
||||
|
||||
static int CheckSizeForOverflow(uint64_t size) {
|
||||
return (size == (size_t)size);
|
||||
@ -87,7 +85,6 @@ void ClearAnimatedImage(AnimatedImage* const image) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBP_HAVE_GIF)
|
||||
// Clear the canvas to transparent.
|
||||
static void ZeroFillCanvas(uint8_t* rgba,
|
||||
uint32_t canvas_width, uint32_t canvas_height) {
|
||||
@ -129,7 +126,6 @@ static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
#endif // WEBP_HAVE_GIF
|
||||
|
||||
// Canonicalize all transparent pixels to transparent black to aid comparison.
|
||||
static void CleanupTransparentPixels(uint32_t* rgba,
|
||||
@ -156,8 +152,6 @@ static int DumpFrame(const char filename[], const char dump_folder[],
|
||||
FILE* f = NULL;
|
||||
const char* row;
|
||||
|
||||
if (dump_folder == NULL) dump_folder = ".";
|
||||
|
||||
base_name = strrchr(filename, '/');
|
||||
base_name = (base_name == NULL) ? filename : base_name + 1;
|
||||
max_len = strlen(dump_folder) + 1 + strlen(base_name)
|
||||
@ -206,7 +200,7 @@ static int IsWebP(const WebPData* const webp_data) {
|
||||
return (WebPGetInfo(webp_data->bytes, webp_data->size, NULL, NULL) != 0);
|
||||
}
|
||||
|
||||
// Read animated WebP bitstream 'webp_data' into 'AnimatedImage' struct.
|
||||
// Read animated WebP bitstream 'file_str' into 'AnimatedImage' struct.
|
||||
static int ReadAnimatedWebP(const char filename[],
|
||||
const WebPData* const webp_data,
|
||||
AnimatedImage* const image, int dump_frames,
|
||||
@ -275,7 +269,6 @@ static int ReadAnimatedWebP(const char filename[],
|
||||
prev_frame_timestamp = timestamp;
|
||||
}
|
||||
ok = dump_ok;
|
||||
if (ok) image->format = ANIM_WEBP;
|
||||
|
||||
End:
|
||||
WebPAnimDecoderDelete(dec);
|
||||
@ -285,7 +278,7 @@ static int ReadAnimatedWebP(const char filename[],
|
||||
// -----------------------------------------------------------------------------
|
||||
// GIF Decoding.
|
||||
|
||||
#if defined(WEBP_HAVE_GIF)
|
||||
#ifdef WEBP_HAVE_GIF
|
||||
|
||||
// Returns true if this is a valid GIF bitstream.
|
||||
static int IsGIF(const WebPData* const data) {
|
||||
@ -430,11 +423,6 @@ static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
|
||||
}
|
||||
|
||||
// Find appropriate app extension and get loop count from the next extension.
|
||||
// We use Chrome's interpretation of the 'loop_count' semantics:
|
||||
// if not present -> loop once
|
||||
// if present and loop_count == 0, return 0 ('infinite').
|
||||
// if present and loop_count != 0, it's the number of *extra* loops
|
||||
// so we need to return loop_count + 1 as total loop number.
|
||||
static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
|
||||
int i;
|
||||
for (i = 0; i < gif->ImageCount; ++i) {
|
||||
@ -452,13 +440,12 @@ static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
|
||||
if (signature_is_ok &&
|
||||
eb2->Function == CONTINUE_EXT_FUNC_CODE && eb2->ByteCount >= 3 &&
|
||||
eb2->Bytes[0] == 1) {
|
||||
const uint32_t extra_loop = ((uint32_t)(eb2->Bytes[2]) << 8) +
|
||||
((uint32_t)(eb2->Bytes[1]) << 0);
|
||||
return (extra_loop > 0) ? extra_loop + 1 : 0;
|
||||
return ((uint32_t)(eb2->Bytes[2]) << 8) +
|
||||
((uint32_t)(eb2->Bytes[1]) << 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1; // Default.
|
||||
return 0; // Default.
|
||||
}
|
||||
|
||||
// Get duration of 'n'th frame in milliseconds.
|
||||
@ -685,7 +672,6 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
|
||||
}
|
||||
}
|
||||
}
|
||||
image->format = ANIM_GIF;
|
||||
DGifCloseFile(gif, NULL);
|
||||
return 1;
|
||||
}
|
||||
|
@ -22,11 +22,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
ANIM_GIF,
|
||||
ANIM_WEBP
|
||||
} AnimatedFileFormat;
|
||||
|
||||
typedef struct {
|
||||
uint8_t* rgba; // Decoded and reconstructed full frame.
|
||||
int duration; // Frame duration in milliseconds.
|
||||
@ -34,7 +29,6 @@ typedef struct {
|
||||
} DecodedFrame;
|
||||
|
||||
typedef struct {
|
||||
AnimatedFileFormat format;
|
||||
uint32_t canvas_width;
|
||||
uint32_t canvas_height;
|
||||
uint32_t bgcolor;
|
||||
|
@ -463,9 +463,8 @@ static int WriteWebPWithMetadata(FILE* const out,
|
||||
} else {
|
||||
const int is_lossless = !memcmp(webp, "VP8L", kTagSize);
|
||||
if (is_lossless) {
|
||||
// Presence of alpha is stored in the 37th bit (29th after the
|
||||
// signature) of VP8L data.
|
||||
if (webp[kChunkHeaderSize + 4] & (1 << 4)) flags |= kAlphaFlag;
|
||||
// Presence of alpha is stored in the 29th bit of VP8L data.
|
||||
if (webp[kChunkHeaderSize + 3] & (1 << 5)) flags |= kAlphaFlag;
|
||||
}
|
||||
ok = ok && (fwrite(kVP8XHeader, kChunkHeaderSize, 1, out) == 1);
|
||||
ok = ok && WriteLE32(out, flags);
|
||||
@ -487,10 +486,10 @@ static int WriteWebPWithMetadata(FILE* const out,
|
||||
*metadata_written |= METADATA_XMP;
|
||||
}
|
||||
return ok;
|
||||
} else {
|
||||
// No metadata, just write the original image file.
|
||||
return (fwrite(webp, webp_size, 1, out) == 1);
|
||||
}
|
||||
|
||||
// No metadata, just write the original image file.
|
||||
return (fwrite(webp, webp_size, 1, out) == 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -72,10 +72,8 @@ static void Help(void) {
|
||||
printf(" -metadata <string> ..... comma separated list of metadata to\n");
|
||||
printf(" ");
|
||||
printf("copy from the input to the output if present\n");
|
||||
printf(" ");
|
||||
printf("Valid values: all, none, icc, xmp (default)\n");
|
||||
printf(" -loop_compatibility .... use compatibility mode for Chrome\n");
|
||||
printf(" version prior to M62 (inclusive)\n");
|
||||
printf(" "
|
||||
"Valid values: all, none, icc, xmp (default)\n");
|
||||
printf(" -mt .................... use multi-threading if available\n");
|
||||
printf("\n");
|
||||
printf(" -version ............... print version number and exit\n");
|
||||
@ -106,7 +104,7 @@ int main(int argc, const char *argv[]) {
|
||||
WebPAnimEncoderOptions enc_options;
|
||||
WebPConfig config;
|
||||
|
||||
int frame_number = 0; // Whether we are processing the first frame.
|
||||
int is_first_frame = 1; // Whether we are processing the first frame.
|
||||
int done;
|
||||
int c;
|
||||
int quiet = 0;
|
||||
@ -117,9 +115,8 @@ int main(int argc, const char *argv[]) {
|
||||
int stored_icc = 0; // Whether we have already stored an ICC profile.
|
||||
WebPData xmp_data;
|
||||
int stored_xmp = 0; // Whether we have already stored an XMP profile.
|
||||
int loop_count = 0; // default: infinite
|
||||
int loop_count = 0;
|
||||
int stored_loop_count = 0; // Whether we have found an explicit loop count.
|
||||
int loop_compatibility = 0;
|
||||
WebPMux* mux = NULL;
|
||||
|
||||
int default_kmin = 1; // Whether to use default kmin value.
|
||||
@ -154,8 +151,6 @@ int main(int argc, const char *argv[]) {
|
||||
} else if (!strcmp(argv[c], "-mixed")) {
|
||||
enc_options.allow_mixed = 1;
|
||||
config.lossless = 0;
|
||||
} else if (!strcmp(argv[c], "-loop_compatibility")) {
|
||||
loop_compatibility = 1;
|
||||
} else if (!strcmp(argv[c], "-q") && c < argc - 1) {
|
||||
config.quality = ExUtilGetFloat(argv[++c], &parse_error);
|
||||
} else if (!strcmp(argv[c], "-m") && c < argc - 1) {
|
||||
@ -282,7 +277,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
if (!DGifGetImageDesc(gif)) goto End;
|
||||
|
||||
if (frame_number == 0) {
|
||||
if (is_first_frame) {
|
||||
if (verbose) {
|
||||
printf("Canvas screen: %d x %d\n", gif->SWidth, gif->SHeight);
|
||||
}
|
||||
@ -324,6 +319,7 @@ int main(int argc, const char *argv[]) {
|
||||
"a memory error.\n");
|
||||
goto End;
|
||||
}
|
||||
is_first_frame = 0;
|
||||
}
|
||||
|
||||
// Some even more broken GIF can have sub-rect with zero width/height.
|
||||
@ -340,11 +336,7 @@ int main(int argc, const char *argv[]) {
|
||||
GIFBlendFrames(&frame, &gif_rect, &curr_canvas);
|
||||
|
||||
if (!WebPAnimEncoderAdd(enc, &curr_canvas, frame_timestamp, &config)) {
|
||||
fprintf(stderr, "Error while adding frame #%d: %s\n", frame_number,
|
||||
WebPAnimEncoderGetError(enc));
|
||||
goto End;
|
||||
} else {
|
||||
++frame_number;
|
||||
fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
|
||||
}
|
||||
|
||||
// Update canvases.
|
||||
@ -394,7 +386,7 @@ int main(int argc, const char *argv[]) {
|
||||
if (verbose) {
|
||||
fprintf(stderr, "Loop count: %d\n", loop_count);
|
||||
}
|
||||
stored_loop_count = loop_compatibility ? (loop_count != 0) : 1;
|
||||
stored_loop_count = (loop_count != 0);
|
||||
} else { // An extension containing metadata.
|
||||
// We only store the first encountered chunk of each type, and
|
||||
// only if requested by the user.
|
||||
@ -451,23 +443,6 @@ int main(int argc, const char *argv[]) {
|
||||
goto End;
|
||||
}
|
||||
|
||||
if (!loop_compatibility) {
|
||||
if (!stored_loop_count) {
|
||||
// if no loop-count element is seen, the default is '1' (loop-once)
|
||||
// and we need to signal it explicitly in WebP. Note however that
|
||||
// in case there's a single frame, we still don't need to store it.
|
||||
if (frame_number > 1) {
|
||||
stored_loop_count = 1;
|
||||
loop_count = 1;
|
||||
}
|
||||
} else if (loop_count > 0 && loop_count < 65535) {
|
||||
// adapt GIF's semantic to WebP's (except in the infinite-loop case)
|
||||
loop_count += 1;
|
||||
}
|
||||
}
|
||||
// loop_count of 0 is the default (infinite), so no need to signal it
|
||||
if (loop_count == 0) stored_loop_count = 0;
|
||||
|
||||
if (stored_loop_count || stored_icc || stored_xmp) {
|
||||
// Re-mux to add loop count and/or metadata as needed.
|
||||
mux = WebPMuxCreate(&webp_data, 1);
|
||||
|
@ -248,9 +248,9 @@ static void HandleKey(unsigned char key, int pos_x, int pos_y) {
|
||||
}
|
||||
}
|
||||
} else if (key == 'i') {
|
||||
// Note: doesn't handle refresh of animation's last-frame (it's quite
|
||||
// more involved to do, since you need to save the previous frame).
|
||||
kParams.print_info = 1 - kParams.print_info;
|
||||
// TODO(skal): handle refresh of animation's last-frame too. It's quite
|
||||
// more involved though (need to save the previous frame).
|
||||
if (!kParams.has_animation) ClearPreviousFrame();
|
||||
glutPostRedisplay();
|
||||
} else if (key == 'd') {
|
||||
@ -260,8 +260,8 @@ static void HandleKey(unsigned char key, int pos_x, int pos_y) {
|
||||
}
|
||||
|
||||
static void HandleReshape(int width, int height) {
|
||||
// Note: reshape doesn't preserve aspect ratio, and might
|
||||
// be handling larger-than-screen pictures incorrectly.
|
||||
// TODO(skal): should we preserve aspect ratio?
|
||||
// Also: handle larger-than-screen pictures correctly.
|
||||
glViewport(0, 0, width, height);
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
@ -378,23 +378,13 @@ static void HandleDisplay(void) {
|
||||
}
|
||||
}
|
||||
glPopMatrix();
|
||||
#if defined(__APPLE__) || defined(_WIN32)
|
||||
glFlush();
|
||||
#else
|
||||
glutSwapBuffers();
|
||||
#endif
|
||||
}
|
||||
|
||||
static void StartDisplay(void) {
|
||||
const int width = kParams.canvas_width;
|
||||
const int height = kParams.canvas_height;
|
||||
// TODO(webp:365) GLUT_DOUBLE results in flickering / old frames to be
|
||||
// partially displayed with animated webp + alpha.
|
||||
#if defined(__APPLE__) || defined(_WIN32)
|
||||
glutInitDisplayMode(GLUT_RGBA);
|
||||
#else
|
||||
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
|
||||
#endif
|
||||
glutInitWindowSize(width, height);
|
||||
glutCreateWindow("WebP viewer");
|
||||
glutDisplayFunc(HandleDisplay);
|
||||
|
@ -233,20 +233,20 @@ static int GetSignedBits(const uint8_t* const data, size_t data_size, size_t nb,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define GET_BITS(v, n) \
|
||||
do { \
|
||||
if (!GetBits(data, data_size, n, &(v), bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossy bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
#define GET_BITS(v, n) \
|
||||
do { \
|
||||
if (!GetBits(data, data_size, n, &v, bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossy bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define GET_SIGNED_BITS(v, n) \
|
||||
do { \
|
||||
if (!GetSignedBits(data, data_size, n, &(v), bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossy bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
#define GET_SIGNED_BITS(v, n) \
|
||||
do { \
|
||||
if (!GetSignedBits(data, data_size, n, &v, bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossy bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WebPInfoStatus ParseLossySegmentHeader(const WebPInfo* const webp_info,
|
||||
@ -462,12 +462,12 @@ static int LLGetBits(const uint8_t* const data, size_t data_size, size_t nb,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define LL_GET_BITS(v, n) \
|
||||
do { \
|
||||
if (!LLGetBits(data, data_size, n, &(v), bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossless bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
#define LL_GET_BITS(v, n) \
|
||||
do { \
|
||||
if (!LLGetBits(data, data_size, n, &v, bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossless bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WebPInfoStatus ParseLosslessTransform(WebPInfo* const webp_info,
|
||||
@ -817,8 +817,9 @@ static WebPInfoStatus ProcessImageChunk(const ChunkData* const chunk_data,
|
||||
if (webp_info->seen_image_subchunk_) {
|
||||
LOG_ERROR("Consecutive VP8/VP8L sub-chunks in an ANMF chunk.");
|
||||
return WEBP_INFO_PARSE_ERROR;
|
||||
} else {
|
||||
webp_info->seen_image_subchunk_ = 1;
|
||||
}
|
||||
webp_info->seen_image_subchunk_ = 1;
|
||||
} else {
|
||||
if (webp_info->chunk_counts_[CHUNK_VP8] ||
|
||||
webp_info->chunk_counts_[CHUNK_VP8L]) {
|
||||
@ -872,9 +873,9 @@ static WebPInfoStatus ProcessALPHChunk(const ChunkData* const chunk_data,
|
||||
if (webp_info->seen_alpha_subchunk_) {
|
||||
LOG_ERROR("Consecutive ALPH sub-chunks in an ANMF chunk.");
|
||||
return WEBP_INFO_PARSE_ERROR;
|
||||
} else {
|
||||
webp_info->seen_alpha_subchunk_ = 1;
|
||||
}
|
||||
webp_info->seen_alpha_subchunk_ = 1;
|
||||
|
||||
if (webp_info->seen_image_subchunk_) {
|
||||
LOG_ERROR("ALPHA sub-chunk detected after VP8 sub-chunk "
|
||||
"in an ANMF chunk.");
|
||||
@ -1106,7 +1107,6 @@ static void HelpLong(void) {
|
||||
"Note: there could be multiple input files;\n"
|
||||
" options must come before input files.\n"
|
||||
"Options:\n"
|
||||
" -version ........... Print version number and exit.\n"
|
||||
" -quiet ............. Do not show chunk parsing information.\n"
|
||||
" -diag .............. Show parsing error diagnosis.\n"
|
||||
" -summary ........... Show chunk stats summary.\n"
|
||||
@ -1140,11 +1140,6 @@ int main(int argc, const char* argv[]) {
|
||||
show_summary = 1;
|
||||
} else if (!strcmp(argv[c], "-bitstream_info")) {
|
||||
parse_bitstream = 1;
|
||||
} else if (!strcmp(argv[c], "-version")) {
|
||||
const int version = WebPGetDecoderVersion();
|
||||
printf("WebP Decoder version: %d.%d.%d\n",
|
||||
(version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
|
||||
return 0;
|
||||
} else { // Assume the remaining are all input files.
|
||||
break;
|
||||
}
|
||||
|
@ -1,4 +1,3 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
|
||||
AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
|
||||
noinst_LTLIBRARIES = libwebpextras.la
|
||||
|
||||
@ -20,22 +19,18 @@ endif
|
||||
|
||||
get_disto_SOURCES = get_disto.c
|
||||
get_disto_CPPFLAGS = $(AM_CPPFLAGS)
|
||||
get_disto_LDADD =
|
||||
get_disto_LDADD += ../imageio/libimageio_util.la
|
||||
get_disto_LDADD += ../imageio/libimagedec.la
|
||||
get_disto_LDADD = ../imageio/libimageio_util.la ../imageio/libimagedec.la
|
||||
get_disto_LDADD += ../src/libwebp.la
|
||||
get_disto_LDADD += $(PNG_LIBS) $(JPEG_LIBS) $(TIFF_LIBS)
|
||||
|
||||
webp_quality_SOURCES = webp_quality.c
|
||||
webp_quality_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
webp_quality_LDADD =
|
||||
webp_quality_LDADD += ../imageio/libimageio_util.la
|
||||
webp_quality_LDADD = ../imageio/libimageio_util.la
|
||||
webp_quality_LDADD += libwebpextras.la
|
||||
webp_quality_LDADD += ../src/libwebp.la
|
||||
|
||||
vwebp_sdl_SOURCES = vwebp_sdl.c webp_to_sdl.c webp_to_sdl.h
|
||||
vwebp_sdl_CPPFLAGS = $(AM_CPPFLAGS) $(SDL_INCLUDES)
|
||||
vwebp_sdl_LDADD =
|
||||
vwebp_sdl_LDADD += ../imageio/libimageio_util.la
|
||||
vwebp_sdl_LDADD = ../imageio/libimageio_util.la
|
||||
vwebp_sdl_LDADD += ../src/libwebp.la
|
||||
vwebp_sdl_LDADD += $(SDL_LIBS)
|
||||
|
@ -10,7 +10,7 @@
|
||||
// Additional WebP utilities.
|
||||
//
|
||||
|
||||
#include "extras/extras.h"
|
||||
#include "./extras.h"
|
||||
#include "webp/format_constants.h"
|
||||
|
||||
#include <assert.h>
|
||||
@ -18,7 +18,7 @@
|
||||
|
||||
#define XTRA_MAJ_VERSION 0
|
||||
#define XTRA_MIN_VERSION 1
|
||||
#define XTRA_REV_VERSION 1
|
||||
#define XTRA_REV_VERSION 0
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
@ -25,28 +25,28 @@ extern "C" {
|
||||
|
||||
// Returns the version number of the extras library, packed in hexadecimal using
|
||||
// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
|
||||
WEBP_EXTERN int WebPGetExtrasVersion(void);
|
||||
WEBP_EXTERN(int) WebPGetExtrasVersion(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Ad-hoc colorspace importers.
|
||||
|
||||
// Import luma sample (gray scale image) into 'picture'. The 'picture'
|
||||
// width and height must be set prior to calling this function.
|
||||
WEBP_EXTERN int WebPImportGray(const uint8_t* gray, WebPPicture* picture);
|
||||
WEBP_EXTERN(int) WebPImportGray(const uint8_t* gray, WebPPicture* picture);
|
||||
|
||||
// Import rgb sample in RGB565 packed format into 'picture'. The 'picture'
|
||||
// width and height must be set prior to calling this function.
|
||||
WEBP_EXTERN int WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);
|
||||
WEBP_EXTERN(int) WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);
|
||||
|
||||
// Import rgb sample in RGB4444 packed format into 'picture'. The 'picture'
|
||||
// width and height must be set prior to calling this function.
|
||||
WEBP_EXTERN int WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
|
||||
WEBP_EXTERN(int) WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
|
||||
|
||||
// Import a color mapped image. The number of colors is less or equal to
|
||||
// MAX_PALETTE_SIZE. 'pic' must have been initialized. Its content, if any,
|
||||
// will be discarded. Returns 'false' in case of error, or if indexed[] contains
|
||||
// invalid indices.
|
||||
WEBP_EXTERN int
|
||||
WEBP_EXTERN(int)
|
||||
WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
|
||||
const uint32_t palette[], int palette_size,
|
||||
WebPPicture* pic);
|
||||
@ -59,7 +59,7 @@ WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
|
||||
// Otherwise (lossy bitstream), the returned value is in the range [0..100].
|
||||
// Any error (invalid bitstream, animated WebP, incomplete header, etc.)
|
||||
// will return a value of -1.
|
||||
WEBP_EXTERN int VP8EstimateQuality(const uint8_t* const data, size_t size);
|
||||
WEBP_EXTERN(int) VP8EstimateQuality(const uint8_t* const data, size_t size);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
@ -24,8 +24,8 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "webp/encode.h"
|
||||
#include "imageio/image_dec.h"
|
||||
#include "imageio/imageio_util.h"
|
||||
#include "../imageio/image_dec.h"
|
||||
#include "../imageio/imageio_util.h"
|
||||
|
||||
static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
|
||||
int keep_alpha) {
|
||||
@ -322,7 +322,6 @@ int main(int argc, const char *argv[]) {
|
||||
fprintf(stderr, "Can only compute the difference map in ARGB format.\n");
|
||||
goto End;
|
||||
}
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
data_size = WebPEncodeLosslessBGRA((const uint8_t*)pic1.argb,
|
||||
pic1.width, pic1.height,
|
||||
pic1.argb_stride * 4,
|
||||
@ -334,12 +333,6 @@ int main(int argc, const char *argv[]) {
|
||||
ret = ImgIoUtilWriteFile(output, data, data_size) ? 0 : 1;
|
||||
WebPFree(data);
|
||||
if (ret) goto End;
|
||||
#else
|
||||
(void)data;
|
||||
(void)data_size;
|
||||
fprintf(stderr, "Cannot save the difference map. Please recompile "
|
||||
"without the WEBP_REDUCE_CSP flag.\n");
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "extras/extras.h"
|
||||
#include "./extras.h"
|
||||
#include "webp/decode.h"
|
||||
|
||||
#include <math.h>
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
#include "webp_to_sdl.h"
|
||||
#include "webp/decode.h"
|
||||
#include "imageio/imageio_util.h"
|
||||
#include "../imageio/imageio_util.h"
|
||||
|
||||
#if defined(WEBP_HAVE_JUST_SDL_H)
|
||||
#include <SDL.h>
|
||||
|
@ -11,8 +11,8 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "extras/extras.h"
|
||||
#include "imageio/imageio_util.h"
|
||||
#include "./extras.h"
|
||||
#include "../imageio/imageio_util.h"
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
int c;
|
||||
|
7
extras/webp_to_sdl.c
Normal file → Executable file
7
extras/webp_to_sdl.c
Normal file → Executable file
@ -28,7 +28,6 @@
|
||||
#include <SDL/SDL.h>
|
||||
#endif
|
||||
|
||||
static int init_ok = 0;
|
||||
int WebpToSDL(const char* data, unsigned int data_size) {
|
||||
int ok = 0;
|
||||
VP8StatusCode status;
|
||||
@ -43,10 +42,7 @@ int WebpToSDL(const char* data, unsigned int data_size) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!init_ok) {
|
||||
SDL_Init(SDL_INIT_VIDEO);
|
||||
init_ok = 1;
|
||||
}
|
||||
SDL_Init(SDL_INIT_VIDEO);
|
||||
|
||||
status = WebPGetFeatures((uint8_t*)data, (size_t)data_size, &config.input);
|
||||
if (status != VP8_STATUS_OK) goto Error;
|
||||
@ -101,7 +97,6 @@ int WebpToSDL(const char* data, unsigned int data_size) {
|
||||
Error:
|
||||
SDL_FreeSurface(surface);
|
||||
SDL_FreeSurface(screen);
|
||||
WebPFreeDecBuffer(output);
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
@ -1,18 +1,13 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
|
||||
noinst_LTLIBRARIES =
|
||||
noinst_LTLIBRARIES += libimageio_util.la
|
||||
noinst_LTLIBRARIES += libimagedec.la
|
||||
noinst_LTLIBRARIES += libimageenc.la
|
||||
noinst_LTLIBRARIES = libimageio_util.la libimagedec.la libimageenc.la
|
||||
|
||||
noinst_HEADERS =
|
||||
noinst_HEADERS += ../src/webp/decode.h
|
||||
noinst_HEADERS += ../src/webp/types.h
|
||||
|
||||
libimageio_util_la_SOURCES =
|
||||
libimageio_util_la_SOURCES += imageio_util.c imageio_util.h
|
||||
libimageio_util_la_SOURCES = imageio_util.c imageio_util.h
|
||||
|
||||
libimagedec_la_SOURCES =
|
||||
libimagedec_la_SOURCES += image_dec.c image_dec.h
|
||||
libimagedec_la_SOURCES = image_dec.c image_dec.h
|
||||
libimagedec_la_SOURCES += jpegdec.c jpegdec.h
|
||||
libimagedec_la_SOURCES += metadata.c metadata.h
|
||||
libimagedec_la_SOURCES += pngdec.c pngdec.h
|
||||
@ -23,7 +18,6 @@ libimagedec_la_SOURCES += wicdec.c wicdec.h
|
||||
libimagedec_la_CPPFLAGS = $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
|
||||
libimagedec_la_CPPFLAGS += $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
|
||||
libimageenc_la_SOURCES =
|
||||
libimageenc_la_SOURCES += image_enc.c image_enc.h
|
||||
libimageenc_la_SOURCES = image_enc.c image_enc.h
|
||||
libimageenc_la_CPPFLAGS = $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
|
||||
libimageenc_la_CPPFLAGS += $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
|
@ -542,24 +542,22 @@ int WebPWriteYUV(FILE* fout, const WebPDecBuffer* const buffer) {
|
||||
// Generic top-level call
|
||||
|
||||
int WebPSaveImage(const WebPDecBuffer* const buffer,
|
||||
WebPOutputFileFormat format,
|
||||
const char* const out_file_name) {
|
||||
WebPOutputFileFormat format, const char* const out_file) {
|
||||
FILE* fout = NULL;
|
||||
int needs_open_file = 1;
|
||||
const int use_stdout = (out_file_name != NULL) && !strcmp(out_file_name, "-");
|
||||
const int use_stdout = (out_file != NULL) && !strcmp(out_file, "-");
|
||||
int ok = 1;
|
||||
|
||||
if (buffer == NULL || out_file_name == NULL) return 0;
|
||||
if (buffer == NULL || out_file == NULL) return 0;
|
||||
|
||||
#ifdef HAVE_WINCODEC_H
|
||||
needs_open_file = (format != PNG);
|
||||
#endif
|
||||
|
||||
if (needs_open_file) {
|
||||
fout = use_stdout ? ImgIoUtilSetBinaryMode(stdout)
|
||||
: fopen(out_file_name, "wb");
|
||||
fout = use_stdout ? ImgIoUtilSetBinaryMode(stdout) : fopen(out_file, "wb");
|
||||
if (fout == NULL) {
|
||||
fprintf(stderr, "Error opening output file %s\n", out_file_name);
|
||||
fprintf(stderr, "Error opening output file %s\n", out_file);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -568,7 +566,7 @@ int WebPSaveImage(const WebPDecBuffer* const buffer,
|
||||
format == RGBA || format == BGRA || format == ARGB ||
|
||||
format == rgbA || format == bgrA || format == Argb) {
|
||||
#ifdef HAVE_WINCODEC_H
|
||||
ok &= WebPWritePNG(out_file_name, use_stdout, buffer);
|
||||
ok &= WebPWritePNG(out_file, use_stdout, buffer);
|
||||
#else
|
||||
ok &= WebPWritePNG(fout, buffer);
|
||||
#endif
|
||||
|
@ -137,11 +137,7 @@ void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
|
||||
|
||||
int ImgIoUtilCheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
|
||||
const uint64_t total_size = nmemb * size;
|
||||
int ok = (total_size == (size_t)total_size);
|
||||
#if defined(WEBP_MAX_IMAGE_SIZE)
|
||||
ok = ok && (total_size <= (uint64_t)WEBP_MAX_IMAGE_SIZE);
|
||||
#endif
|
||||
return ok;
|
||||
return (total_size == (size_t)total_size);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
@ -304,18 +304,18 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
|
||||
|
||||
if (stride != (int)stride ||
|
||||
!ImgIoUtilCheckSizeArgumentsOverflow(stride, height)) {
|
||||
goto Error;
|
||||
goto End;
|
||||
}
|
||||
|
||||
rgb = (uint8_t*)malloc((size_t)stride * height);
|
||||
if (rgb == NULL) {
|
||||
goto Error;
|
||||
goto End;
|
||||
}
|
||||
buffer[0] = (JSAMPLE*)rgb;
|
||||
|
||||
while (dinfo.output_scanline < dinfo.output_height) {
|
||||
if (jpeg_read_scanlines((j_decompress_ptr)&dinfo, buffer, 1) != 1) {
|
||||
goto Error;
|
||||
goto End;
|
||||
}
|
||||
buffer[0] += stride;
|
||||
}
|
||||
|
@ -117,13 +117,8 @@ static size_t ReadPAMFields(PNMInfo* const info, size_t off) {
|
||||
}
|
||||
}
|
||||
if (!(info->seen_flags & TUPLE_FLAG)) {
|
||||
if (info->depth > 0 && info->depth <= 4) {
|
||||
info->seen_flags |= TUPLE_FLAG;
|
||||
info->bytes_per_px = info->depth * (info->max_value > 255 ? 2 : 1);
|
||||
} else {
|
||||
fprintf(stderr, "PAM: invalid bitdepth (%d).\n", info->depth);
|
||||
return 0;
|
||||
}
|
||||
info->seen_flags |= TUPLE_FLAG;
|
||||
info->bytes_per_px = info->depth * (info->max_value > 255 ? 2 : 1);
|
||||
}
|
||||
if (info->seen_flags != ALL_NEEDED_FLAGS) {
|
||||
fprintf(stderr, "PAM: incomplete header.\n");
|
||||
|
@ -9,10 +9,6 @@
|
||||
//
|
||||
// WebP decode.
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "./webpdec.h"
|
||||
|
||||
#include <stdio.h>
|
||||
@ -145,32 +141,17 @@ int ReadWebP(const uint8_t* const data, size_t data_size,
|
||||
|
||||
do {
|
||||
const int has_alpha = keep_alpha && bitstream->has_alpha;
|
||||
uint64_t stride;
|
||||
pic->width = bitstream->width;
|
||||
pic->height = bitstream->height;
|
||||
if (pic->use_argb) {
|
||||
stride = (uint64_t)bitstream->width * 4;
|
||||
} else {
|
||||
stride = (uint64_t)bitstream->width * (has_alpha ? 5 : 3) / 2;
|
||||
pic->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
|
||||
}
|
||||
|
||||
if (!ImgIoUtilCheckSizeArgumentsOverflow(stride, bitstream->height)) {
|
||||
status = VP8_STATUS_OUT_OF_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!pic->use_argb) pic->colorspace = has_alpha ? WEBP_YUV420A
|
||||
: WEBP_YUV420;
|
||||
ok = WebPPictureAlloc(pic);
|
||||
if (!ok) {
|
||||
status = VP8_STATUS_OUT_OF_MEMORY;
|
||||
break;
|
||||
}
|
||||
if (pic->use_argb) {
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
output_buffer->colorspace = MODE_ARGB;
|
||||
#else
|
||||
output_buffer->colorspace = MODE_BGRA;
|
||||
#endif
|
||||
output_buffer->u.RGBA.rgba = (uint8_t*)pic->argb;
|
||||
output_buffer->u.RGBA.stride = pic->argb_stride * sizeof(uint32_t);
|
||||
output_buffer->u.RGBA.size = output_buffer->u.RGBA.stride * pic->height;
|
||||
|
@ -34,16 +34,6 @@ else
|
||||
GL_LIBS = -lglut -lGL
|
||||
endif
|
||||
|
||||
# SDL flags: use sdl-config if it exists
|
||||
SDL_CONFIG = $(shell sdl-config --version 2> /dev/null)
|
||||
ifneq ($(SDL_CONFIG),)
|
||||
SDL_LIBS = $(shell sdl-config --libs)
|
||||
SDL_FLAGS = $(shell sdl-config --cflags)
|
||||
else
|
||||
# use best-guess
|
||||
SDL_LIBS = -lSDL
|
||||
SDL_FLAGS =
|
||||
endif
|
||||
|
||||
# To install libraries on Mac OS X:
|
||||
# 1. Install MacPorts (http://www.macports.org/install.php)
|
||||
@ -67,7 +57,7 @@ endif
|
||||
# EXTRA_FLAGS += -DWEBP_EXPERIMENTAL_FEATURES
|
||||
|
||||
# Extra flags to enable byte swap for 16 bit colorspaces.
|
||||
# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP=1
|
||||
# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP
|
||||
|
||||
# Extra flags to enable multi-threading
|
||||
EXTRA_FLAGS += -DWEBP_USE_THREAD
|
||||
@ -113,7 +103,7 @@ endif
|
||||
|
||||
AR = ar
|
||||
ARFLAGS = r
|
||||
CPPFLAGS = -I. -Isrc/ -Wall
|
||||
CPPFLAGS = -Isrc/ -Wall
|
||||
CFLAGS = -O3 -DNDEBUG $(EXTRA_FLAGS)
|
||||
CC = gcc
|
||||
INSTALL = install
|
||||
@ -183,6 +173,9 @@ DSP_DEC_OBJS = \
|
||||
src/dsp/yuv_sse2.o \
|
||||
|
||||
DSP_ENC_OBJS = \
|
||||
src/dsp/argb.o \
|
||||
src/dsp/argb_mips_dsp_r2.o \
|
||||
src/dsp/argb_sse2.o \
|
||||
src/dsp/cost.o \
|
||||
src/dsp/cost_mips32.o \
|
||||
src/dsp/cost_mips_dsp_r2.o \
|
||||
@ -342,8 +335,7 @@ OUT_LIBS += src/libwebp.a
|
||||
EXTRA_LIB = extras/libwebpextras.a
|
||||
OUT_EXAMPLES = examples/cwebp examples/dwebp
|
||||
EXTRA_EXAMPLES = examples/gif2webp examples/vwebp examples/webpmux \
|
||||
examples/anim_diff examples/anim_dump \
|
||||
examples/img2webp examples/webpinfo
|
||||
examples/anim_diff examples/img2webp examples/webpinfo
|
||||
OTHER_EXAMPLES = extras/get_disto extras/webp_quality extras/vwebp_sdl
|
||||
|
||||
OUTPUT = $(OUT_LIBS) $(OUT_EXAMPLES)
|
||||
@ -371,7 +363,7 @@ src/utils/bit_reader_utils.o: src/utils/endian_inl_utils.h
|
||||
src/utils/bit_writer_utils.o: src/utils/endian_inl_utils.h
|
||||
|
||||
%.o: %.c $(HDRS)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
|
||||
|
||||
examples/libanim_util.a: $(ANIM_UTIL_OBJS)
|
||||
examples/libexample_util.a: $(EX_UTIL_OBJS)
|
||||
@ -389,7 +381,6 @@ src/demux/libwebpdemux.a: $(LIBWEBPDEMUX_OBJS)
|
||||
$(AR) $(ARFLAGS) $@ $^
|
||||
|
||||
examples/anim_diff: examples/anim_diff.o $(ANIM_UTIL_OBJS) $(GIFDEC_OBJS)
|
||||
examples/anim_dump: examples/anim_dump.o $(ANIM_UTIL_OBJS)
|
||||
examples/cwebp: examples/cwebp.o
|
||||
examples/dwebp: examples/dwebp.o
|
||||
examples/gif2webp: examples/gif2webp.o $(GIFDEC_OBJS)
|
||||
@ -403,13 +394,6 @@ examples/anim_diff: src/demux/libwebpdemux.a examples/libexample_util.a
|
||||
examples/anim_diff: imageio/libimageio_util.a src/libwebp.a
|
||||
examples/anim_diff: EXTRA_LIBS += $(GIF_LIBS)
|
||||
examples/anim_diff: EXTRA_FLAGS += -DWEBP_HAVE_GIF
|
||||
examples/anim_dump: examples/libanim_util.a
|
||||
examples/anim_dump: src/demux/libwebpdemux.a
|
||||
examples/anim_dump: examples/libexample_util.a
|
||||
examples/anim_dump: imageio/libimageio_util.a
|
||||
examples/anim_dump: imageio/libimageenc.a
|
||||
examples/anim_dump: src/libwebp.a
|
||||
examples/anim_dump: EXTRA_LIBS += $(GIF_LIBS) $(DWEBP_LIBS)
|
||||
examples/cwebp: examples/libexample_util.a
|
||||
examples/cwebp: imageio/libimagedec.a
|
||||
examples/cwebp: imageio/libimageio_util.a
|
||||
@ -450,8 +434,8 @@ extras/vwebp_sdl: extras/vwebp_sdl.o
|
||||
extras/vwebp_sdl: extras/webp_to_sdl.o
|
||||
extras/vwebp_sdl: imageio/libimageio_util.a
|
||||
extras/vwebp_sdl: src/libwebp.a
|
||||
extras/vwebp_sdl: EXTRA_FLAGS += -DWEBP_HAVE_SDL $(SDL_FLAGS)
|
||||
extras/vwebp_sdl: EXTRA_LIBS += $(SDL_LIBS)
|
||||
extras/vwebp_sdl: EXTRA_FLAGS += -DWEBP_HAVE_SDL
|
||||
extras/vwebp_sdl: EXTRA_LIBS += -lSDL
|
||||
|
||||
$(OUT_EXAMPLES) $(EXTRA_EXAMPLES) $(OTHER_EXAMPLES):
|
||||
$(CC) -o $@ $^ $(LDFLAGS)
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH GIF2WEBP 1 "September 20, 2017"
|
||||
.TH GIF2WEBP 1 "January 25, 2017"
|
||||
.SH NAME
|
||||
gif2webp \- Convert a GIF image to WebP
|
||||
.SH SYNOPSIS
|
||||
@ -109,9 +109,6 @@ the range of 20 to 50.
|
||||
.TP
|
||||
.B \-mt
|
||||
Use multi-threading for encoding, if possible.
|
||||
.B \-loop_compatibility
|
||||
If enabled, handle the loop information in a compatible fashion for Chrome
|
||||
version prior to M62 (inclusive) and Firefox.
|
||||
.TP
|
||||
.B \-v
|
||||
Print extra information.
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH WEBPINFO 1 "November 24, 2017"
|
||||
.TH WEBPINFO 1 "May 08, 2017"
|
||||
.SH NAME
|
||||
webpinfo \- print out the chunk level structure of WebP files
|
||||
along with basic integrity checks.
|
||||
@ -22,19 +22,16 @@ WebP format.
|
||||
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-version
|
||||
Print the version number (as major.minor.revision) and exit.
|
||||
.TP
|
||||
.B \-quiet
|
||||
.B -quiet
|
||||
Do not show chunk parsing information.
|
||||
.TP
|
||||
.B \-diag
|
||||
.B -diag
|
||||
Show parsing error diagnosis.
|
||||
.TP
|
||||
.B \-summary
|
||||
.B -summary
|
||||
Show chunk stats summary.
|
||||
.TP
|
||||
.BI \-bitstream_info
|
||||
.BI -bitstream_info
|
||||
Parse bitstream header.
|
||||
.TP
|
||||
.B \-h, \-help
|
||||
|
@ -22,7 +22,6 @@ commondir = $(includedir)/webp
|
||||
libwebp_la_SOURCES =
|
||||
libwebpinclude_HEADERS =
|
||||
libwebpinclude_HEADERS += webp/encode.h
|
||||
|
||||
noinst_HEADERS =
|
||||
noinst_HEADERS += webp/format_constants.h
|
||||
|
||||
@ -36,7 +35,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
|
||||
# other than the ones listed on the command line, i.e., after linking, it will
|
||||
# not have unresolved symbols. Some platforms (Windows among them) require all
|
||||
# symbols in shared libraries to be resolved at library creation.
|
||||
libwebp_la_LDFLAGS = -no-undefined -version-info 7:1:0
|
||||
libwebp_la_LDFLAGS = -no-undefined -version-info 7:0:0
|
||||
libwebpincludedir = $(includedir)/webp
|
||||
pkgconfig_DATA = libwebp.pc
|
||||
|
||||
@ -48,7 +47,7 @@ if BUILD_LIBWEBPDECODER
|
||||
libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
|
||||
libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la
|
||||
|
||||
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:1:0
|
||||
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:0:0
|
||||
pkgconfig_DATA += libwebpdecoder.pc
|
||||
endif
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
|
||||
noinst_LTLIBRARIES = libwebpdecode.la
|
||||
|
||||
libwebpdecode_la_SOURCES =
|
||||
|
@ -12,13 +12,13 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "src/dec/alphai_dec.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/utils/quant_levels_dec_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "./alphai_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../utils/quant_levels_dec_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/format_constants.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// ALPHDecoder object.
|
||||
|
@ -11,11 +11,11 @@
|
||||
//
|
||||
// Author: Urvang (urvang@google.com)
|
||||
|
||||
#ifndef WEBP_DEC_ALPHAI_DEC_H_
|
||||
#define WEBP_DEC_ALPHAI_DEC_H_
|
||||
#ifndef WEBP_DEC_ALPHAI_H_
|
||||
#define WEBP_DEC_ALPHAI_H_
|
||||
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/filters_utils.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/filters_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -51,4 +51,4 @@ void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_ALPHAI_DEC_H_ */
|
||||
#endif /* WEBP_DEC_ALPHAI_H_ */
|
||||
|
@ -13,15 +13,15 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// WebPDecBuffer
|
||||
|
||||
// Number of bytes per pixel for the different color-spaces.
|
||||
static const uint8_t kModeBpp[MODE_LAST] = {
|
||||
static const int kModeBpp[MODE_LAST] = {
|
||||
3, 4, 3, 4, 4, 2, 2,
|
||||
4, 4, 4, 2, // pre-multiplied modes
|
||||
1, 1 };
|
||||
@ -36,7 +36,7 @@ static int IsValidColorspace(int webp_csp_mode) {
|
||||
// strictly speaking, the very last (or first, if flipped) row
|
||||
// doesn't require padding.
|
||||
#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \
|
||||
((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
|
||||
(uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)
|
||||
|
||||
static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
|
||||
int ok = 1;
|
||||
@ -74,8 +74,7 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
|
||||
} else { // RGB checks
|
||||
const WebPRGBABuffer* const buf = &buffer->u.RGBA;
|
||||
const int stride = abs(buf->stride);
|
||||
const uint64_t size =
|
||||
MIN_BUFFER_SIZE(width * kModeBpp[mode], height, stride);
|
||||
const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
|
||||
ok &= (size <= buf->size);
|
||||
ok &= (stride >= width * kModeBpp[mode]);
|
||||
ok &= (buf->rgba != NULL);
|
||||
@ -99,14 +98,9 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
|
||||
uint64_t uv_size = 0, a_size = 0, total_size;
|
||||
// We need memory and it hasn't been allocated yet.
|
||||
// => initialize output buffer, now that dimensions are known.
|
||||
int stride;
|
||||
uint64_t size;
|
||||
const int stride = w * kModeBpp[mode];
|
||||
const uint64_t size = (uint64_t)stride * h;
|
||||
|
||||
if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
stride = w * kModeBpp[mode];
|
||||
size = (uint64_t)stride * h;
|
||||
if (!WebPIsRGBMode(mode)) {
|
||||
uv_stride = (w + 1) / 2;
|
||||
uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
|
||||
@ -175,11 +169,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
|
||||
return VP8_STATUS_OK;
|
||||
}
|
||||
|
||||
VP8StatusCode WebPAllocateDecBuffer(int width, int height,
|
||||
VP8StatusCode WebPAllocateDecBuffer(int w, int h,
|
||||
const WebPDecoderOptions* const options,
|
||||
WebPDecBuffer* const buffer) {
|
||||
WebPDecBuffer* const out) {
|
||||
VP8StatusCode status;
|
||||
if (buffer == NULL || width <= 0 || height <= 0) {
|
||||
if (out == NULL || w <= 0 || h <= 0) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
if (options != NULL) { // First, apply options if there is any.
|
||||
@ -188,39 +182,33 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
|
||||
const int ch = options->crop_height;
|
||||
const int x = options->crop_left & ~1;
|
||||
const int y = options->crop_top & ~1;
|
||||
if (x < 0 || y < 0 || cw <= 0 || ch <= 0 ||
|
||||
x + cw > width || y + ch > height) {
|
||||
if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) {
|
||||
return VP8_STATUS_INVALID_PARAM; // out of frame boundary.
|
||||
}
|
||||
width = cw;
|
||||
height = ch;
|
||||
w = cw;
|
||||
h = ch;
|
||||
}
|
||||
|
||||
if (options->use_scaling) {
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
int scaled_width = options->scaled_width;
|
||||
int scaled_height = options->scaled_height;
|
||||
if (!WebPRescalerGetScaledDimensions(
|
||||
width, height, &scaled_width, &scaled_height)) {
|
||||
w, h, &scaled_width, &scaled_height)) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
width = scaled_width;
|
||||
height = scaled_height;
|
||||
#else
|
||||
return VP8_STATUS_INVALID_PARAM; // rescaling not supported
|
||||
#endif
|
||||
w = scaled_width;
|
||||
h = scaled_height;
|
||||
}
|
||||
}
|
||||
buffer->width = width;
|
||||
buffer->height = height;
|
||||
out->width = w;
|
||||
out->height = h;
|
||||
|
||||
// Then, allocate buffer for real.
|
||||
status = AllocateBuffer(buffer);
|
||||
status = AllocateBuffer(out);
|
||||
if (status != VP8_STATUS_OK) return status;
|
||||
|
||||
// Use the stride trick if vertical flip is needed.
|
||||
if (options != NULL && options->flip) {
|
||||
status = WebPFlipBuffer(buffer);
|
||||
status = WebPFlipBuffer(out);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_COMMON_DEC_H_
|
||||
#define WEBP_DEC_COMMON_DEC_H_
|
||||
#ifndef WEBP_DEC_COMMON_H_
|
||||
#define WEBP_DEC_COMMON_H_
|
||||
|
||||
// intra prediction modes
|
||||
enum { B_DC_PRED = 0, // 4x4 modes
|
||||
@ -51,4 +51,4 @@ enum { MB_FEATURE_TREE_PROBS = 3,
|
||||
NUM_PROBAS = 11
|
||||
};
|
||||
|
||||
#endif // WEBP_DEC_COMMON_DEC_H_
|
||||
#endif // WEBP_DEC_COMMON_H_
|
||||
|
@ -12,13 +12,13 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main reconstruction function.
|
||||
|
||||
static const uint16_t kScan[16] = {
|
||||
static const int kScan[16] = {
|
||||
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
|
||||
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
|
||||
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
|
||||
@ -320,7 +320,7 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
|
||||
#define MIN_DITHER_AMP 4
|
||||
|
||||
#define DITHER_AMP_TAB_SIZE 12
|
||||
static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
|
||||
static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
|
||||
// roughly, it's dqm->uv_mat_[1]
|
||||
8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
|
||||
};
|
||||
@ -728,7 +728,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
}
|
||||
|
||||
mem = (uint8_t*)dec->mem_;
|
||||
dec->intra_t_ = mem;
|
||||
dec->intra_t_ = (uint8_t*)mem;
|
||||
mem += intra_pred_mode_size;
|
||||
|
||||
dec->yuv_t_ = (VP8TopSamples*)mem;
|
||||
@ -750,7 +750,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
|
||||
mem = (uint8_t*)WEBP_ALIGN(mem);
|
||||
assert((yuv_size & WEBP_ALIGN_CST) == 0);
|
||||
dec->yuv_b_ = mem;
|
||||
dec->yuv_b_ = (uint8_t*)mem;
|
||||
mem += yuv_size;
|
||||
|
||||
dec->mb_data_ = (VP8MBData*)mem;
|
||||
@ -766,7 +766,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
const int extra_rows = kFilterExtraRows[dec->filter_type_];
|
||||
const int extra_y = extra_rows * dec->cache_y_stride_;
|
||||
const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
|
||||
dec->cache_y_ = mem + extra_y;
|
||||
dec->cache_y_ = ((uint8_t*)mem) + extra_y;
|
||||
dec->cache_u_ = dec->cache_y_
|
||||
+ 16 * num_caches * dec->cache_y_stride_ + extra_uv;
|
||||
dec->cache_v_ = dec->cache_u_
|
||||
@ -776,7 +776,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
mem += cache_size;
|
||||
|
||||
// alpha plane
|
||||
dec->alpha_plane_ = alpha_size ? mem : NULL;
|
||||
dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
|
||||
mem += alpha_size;
|
||||
assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
|
||||
|
||||
|
@ -15,10 +15,10 @@
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dec/alphai_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "./alphai_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
// In append mode, buffer allocations increase as multiples of this value.
|
||||
// Needs to be a power of 2.
|
||||
@ -283,8 +283,10 @@ static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
|
||||
|
||||
static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
|
||||
if (idec->state_ == STATE_VP8_DATA) {
|
||||
// Synchronize the thread, clean-up and check for errors.
|
||||
VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
|
||||
VP8Io* const io = &idec->io_;
|
||||
if (io->teardown != NULL) {
|
||||
io->teardown(io);
|
||||
}
|
||||
}
|
||||
idec->state_ = STATE_ERROR;
|
||||
return error;
|
||||
@ -449,10 +451,7 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
|
||||
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
|
||||
VP8Io* const io = &idec->io_;
|
||||
|
||||
// Make sure partition #0 has been read before, to set dec to ready_.
|
||||
if (!dec->ready_) {
|
||||
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
}
|
||||
assert(dec->ready_);
|
||||
for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
|
||||
if (idec->last_mb_y_ != dec->mb_y_) {
|
||||
if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
|
||||
@ -492,7 +491,6 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
|
||||
}
|
||||
// Synchronize the thread and check for errors.
|
||||
if (!VP8ExitCritical(dec, io)) {
|
||||
idec->state_ = STATE_ERROR; // prevent re-entry in IDecError
|
||||
return IDecError(idec, VP8_STATUS_USER_ABORT);
|
||||
}
|
||||
dec->ready_ = 0;
|
||||
@ -573,10 +571,6 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
|
||||
status = DecodePartition0(idec);
|
||||
}
|
||||
if (idec->state_ == STATE_VP8_DATA) {
|
||||
const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
|
||||
if (dec == NULL) {
|
||||
return VP8_STATUS_SUSPENDED; // can't continue if we have no decoder.
|
||||
}
|
||||
status = DecodeRemaining(idec);
|
||||
}
|
||||
if (idec->state_ == STATE_VP8L_HEADER) {
|
||||
@ -679,12 +673,12 @@ void WebPIDelete(WebPIDecoder* idec) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Wrapper toward WebPINewDecoder
|
||||
|
||||
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
|
||||
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
|
||||
size_t output_buffer_size, int output_stride) {
|
||||
const int is_external_memory = (output_buffer != NULL) ? 1 : 0;
|
||||
WebPIDecoder* idec;
|
||||
|
||||
if (csp >= MODE_YUV) return NULL;
|
||||
if (mode >= MODE_YUV) return NULL;
|
||||
if (is_external_memory == 0) { // Overwrite parameters to sane values.
|
||||
output_buffer_size = 0;
|
||||
output_stride = 0;
|
||||
@ -695,7 +689,7 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
|
||||
}
|
||||
idec = WebPINewDecoder(NULL);
|
||||
if (idec == NULL) return NULL;
|
||||
idec->output_.colorspace = csp;
|
||||
idec->output_.colorspace = mode;
|
||||
idec->output_.is_external_memory = is_external_memory;
|
||||
idec->output_.u.RGBA.rgba = output_buffer;
|
||||
idec->output_.u.RGBA.stride = output_stride;
|
||||
|
@ -13,11 +13,11 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../dsp/yuv.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main YUV<->RGB conversion functions
|
||||
@ -212,7 +212,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
|
||||
int num_rows;
|
||||
const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
|
||||
uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
uint8_t* alpha_dst = base_rgba;
|
||||
#else
|
||||
uint8_t* alpha_dst = base_rgba + 1;
|
||||
@ -241,7 +241,6 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
|
||||
//------------------------------------------------------------------------------
|
||||
// YUV rescaling (no final RGB conversion needed)
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
static int Rescale(const uint8_t* src, int src_stride,
|
||||
int new_lines, WebPRescaler* const wrk) {
|
||||
int num_lines_out = 0;
|
||||
@ -432,7 +431,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
|
||||
int max_lines_out) {
|
||||
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
|
||||
uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
uint8_t* alpha_dst = base_rgba;
|
||||
#else
|
||||
uint8_t* alpha_dst = base_rgba + 1;
|
||||
@ -542,8 +541,6 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Default custom functions
|
||||
|
||||
@ -564,14 +561,10 @@ static int CustomSetup(VP8Io* io) {
|
||||
WebPInitUpsamplers();
|
||||
}
|
||||
if (io->use_scaling) {
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
|
||||
if (!ok) {
|
||||
return 0; // memory error
|
||||
}
|
||||
#else
|
||||
return 0; // rescaling support not compiled
|
||||
#endif
|
||||
} else {
|
||||
if (is_rgb) {
|
||||
WebPInitSamplers();
|
||||
@ -605,6 +598,9 @@ static int CustomSetup(VP8Io* io) {
|
||||
}
|
||||
}
|
||||
|
||||
if (is_rgb) {
|
||||
VP8YUVInit();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
|
||||
static WEBP_INLINE int clip(int v, int M) {
|
||||
return v < 0 ? 0 : v > M ? M : v;
|
||||
|
@ -11,19 +11,15 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/bit_reader_inl_utils.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../utils/bit_reader_inl_utils.h"
|
||||
|
||||
#if !defined(USE_GENERIC_TREE)
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
|
||||
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
|
||||
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
|
||||
#else
|
||||
#define USE_GENERIC_TREE 0
|
||||
#define USE_GENERIC_TREE
|
||||
#endif
|
||||
#endif // USE_GENERIC_TREE
|
||||
|
||||
#if (USE_GENERIC_TREE == 1)
|
||||
#ifdef USE_GENERIC_TREE
|
||||
static const int8_t kYModesIntra4[18] = {
|
||||
-B_DC_PRED, 1,
|
||||
-B_TM_PRED, 2,
|
||||
@ -321,7 +317,7 @@ static void ParseIntraMode(VP8BitReader* const br,
|
||||
int x;
|
||||
for (x = 0; x < 4; ++x) {
|
||||
const uint8_t* const prob = kBModesProba[top[x]][ymode];
|
||||
#if (USE_GENERIC_TREE == 1)
|
||||
#ifdef USE_GENERIC_TREE
|
||||
// Generic tree-parsing
|
||||
int i = kYModesIntra4[VP8GetBit(br, prob[0])];
|
||||
while (i > 0) {
|
||||
@ -339,7 +335,7 @@ static void ParseIntraMode(VP8BitReader* const br,
|
||||
(!VP8GetBit(br, prob[6]) ? B_LD_PRED :
|
||||
(!VP8GetBit(br, prob[7]) ? B_VL_PRED :
|
||||
(!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
|
||||
#endif // USE_GENERIC_TREE
|
||||
#endif // USE_GENERIC_TREE
|
||||
top[x] = ymode;
|
||||
}
|
||||
memcpy(modes, top, 4 * sizeof(*top));
|
||||
@ -502,7 +498,7 @@ static const uint8_t
|
||||
|
||||
// Paragraph 9.9
|
||||
|
||||
static const uint8_t kBands[16 + 1] = {
|
||||
static const int kBands[16 + 1] = {
|
||||
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
0 // extra entry as sentinel
|
||||
};
|
||||
|
@ -13,12 +13,12 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dec/alphai_dec.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/bit_reader_inl_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "./alphai_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/bit_reader_inl_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
@ -11,10 +11,10 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_VP8_DEC_H_
|
||||
#define WEBP_DEC_VP8_DEC_H_
|
||||
#ifndef WEBP_WEBP_DECODE_VP8_H_
|
||||
#define WEBP_WEBP_DECODE_VP8_H_
|
||||
|
||||
#include "src/webp/decode.h"
|
||||
#include "../webp/decode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -157,24 +157,24 @@ void VP8Delete(VP8Decoder* const dec);
|
||||
// Miscellaneous VP8/VP8L bitstream probing functions.
|
||||
|
||||
// Returns true if the next 3 bytes in data contain the VP8 signature.
|
||||
WEBP_EXTERN int VP8CheckSignature(const uint8_t* const data, size_t data_size);
|
||||
WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);
|
||||
|
||||
// Validates the VP8 data-header and retrieves basic header information viz
|
||||
// width and height. Returns 0 in case of formatting error. *width/*height
|
||||
// can be passed NULL.
|
||||
WEBP_EXTERN int VP8GetInfo(
|
||||
WEBP_EXTERN(int) VP8GetInfo(
|
||||
const uint8_t* data,
|
||||
size_t data_size, // data available so far
|
||||
size_t chunk_size, // total data size expected in the chunk
|
||||
int* const width, int* const height);
|
||||
|
||||
// Returns true if the next byte(s) in data is a VP8L signature.
|
||||
WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
|
||||
WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);
|
||||
|
||||
// Validates the VP8L data-header and retrieves basic header information viz
|
||||
// width, height and alpha. Returns 0 in case of formatting error.
|
||||
// width/height/has_alpha can be passed NULL.
|
||||
WEBP_EXTERN int VP8LGetInfo(
|
||||
WEBP_EXTERN(int) VP8LGetInfo(
|
||||
const uint8_t* data, size_t data_size, // data available so far
|
||||
int* const width, int* const height, int* const has_alpha);
|
||||
|
||||
@ -182,4 +182,4 @@ WEBP_EXTERN int VP8LGetInfo(
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_VP8_DEC_H_ */
|
||||
#endif /* WEBP_WEBP_DECODE_VP8_H_ */
|
||||
|
@ -11,16 +11,16 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_VP8I_DEC_H_
|
||||
#define WEBP_DEC_VP8I_DEC_H_
|
||||
#ifndef WEBP_DEC_VP8I_H_
|
||||
#define WEBP_DEC_VP8I_H_
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
#include "src/dec/common_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/utils/bit_reader_utils.h"
|
||||
#include "src/utils/random_utils.h"
|
||||
#include "src/utils/thread_utils.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./common_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "../utils/bit_reader_utils.h"
|
||||
#include "../utils/random_utils.h"
|
||||
#include "../utils/thread_utils.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -32,7 +32,7 @@ extern "C" {
|
||||
// version numbers
|
||||
#define DEC_MAJ_VERSION 0
|
||||
#define DEC_MIN_VERSION 6
|
||||
#define DEC_REV_VERSION 1
|
||||
#define DEC_REV_VERSION 0
|
||||
|
||||
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||
// Constraints are: We need to store one 16x16 block of luma samples (y),
|
||||
@ -57,6 +57,7 @@ extern "C" {
|
||||
// '|' = left sample, '-' = top sample, '+' = top-left sample
|
||||
// 't' = extra top-right sample for 4x4 modes
|
||||
#define YUV_SIZE (BPS * 17 + BPS * 9)
|
||||
#define Y_SIZE (BPS * 17)
|
||||
#define Y_OFF (BPS * 1 + 8)
|
||||
#define U_OFF (Y_OFF + BPS * 16 + BPS)
|
||||
#define V_OFF (U_OFF + 16)
|
||||
@ -316,4 +317,4 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_VP8I_DEC_H_ */
|
||||
#endif /* WEBP_DEC_VP8I_H_ */
|
||||
|
@ -14,22 +14,22 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dec/alphai_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include "src/utils/huffman_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "./alphai_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../dsp/lossless.h"
|
||||
#include "../dsp/lossless_common.h"
|
||||
#include "../dsp/yuv.h"
|
||||
#include "../utils/endian_inl_utils.h"
|
||||
#include "../utils/huffman_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
#define NUM_ARGB_CACHE_ROWS 16
|
||||
|
||||
static const int kCodeLengthLiterals = 16;
|
||||
static const int kCodeLengthRepeatCode = 16;
|
||||
static const uint8_t kCodeLengthExtraBits[3] = { 2, 3, 7 };
|
||||
static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
|
||||
static const int kCodeLengthExtraBits[3] = { 2, 3, 7 };
|
||||
static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Five Huffman codes are used at each meta code:
|
||||
@ -86,7 +86,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
|
||||
// All values computed for 8-bit first level lookup with Mark Adler's tool:
|
||||
// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
|
||||
#define FIXED_TABLE_SIZE (630 * 3 + 410)
|
||||
static const uint16_t kTableSize[12] = {
|
||||
static const int kTableSize[12] = {
|
||||
FIXED_TABLE_SIZE + 654,
|
||||
FIXED_TABLE_SIZE + 656,
|
||||
FIXED_TABLE_SIZE + 658,
|
||||
@ -253,11 +253,11 @@ static int ReadHuffmanCodeLengths(
|
||||
int symbol;
|
||||
int max_symbol;
|
||||
int prev_code_len = DEFAULT_CODE_LENGTH;
|
||||
HuffmanTables tables;
|
||||
HuffmanCode table[1 << LENGTHS_TABLE_BITS];
|
||||
|
||||
if (!VP8LHuffmanTablesAllocate(1 << LENGTHS_TABLE_BITS, &tables) ||
|
||||
!VP8LBuildHuffmanTable(&tables, LENGTHS_TABLE_BITS,
|
||||
code_length_code_lengths, NUM_CODE_LENGTH_CODES)) {
|
||||
if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS,
|
||||
code_length_code_lengths,
|
||||
NUM_CODE_LENGTH_CODES)) {
|
||||
goto End;
|
||||
}
|
||||
|
||||
@ -277,7 +277,7 @@ static int ReadHuffmanCodeLengths(
|
||||
int code_len;
|
||||
if (max_symbol-- == 0) break;
|
||||
VP8LFillBitWindow(br);
|
||||
p = &tables.curr_segment->start[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
|
||||
p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
|
||||
VP8LSetBitPos(br, br->bit_pos_ + p->bits);
|
||||
code_len = p->value;
|
||||
if (code_len < kCodeLengthLiterals) {
|
||||
@ -300,7 +300,6 @@ static int ReadHuffmanCodeLengths(
|
||||
ok = 1;
|
||||
|
||||
End:
|
||||
VP8LHuffmanTablesDeallocate(&tables);
|
||||
if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
|
||||
return ok;
|
||||
}
|
||||
@ -308,8 +307,7 @@ static int ReadHuffmanCodeLengths(
|
||||
// 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman
|
||||
// tree.
|
||||
static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
|
||||
int* const code_lengths,
|
||||
HuffmanTables* const table) {
|
||||
int* const code_lengths, HuffmanCode* const table) {
|
||||
int ok = 0;
|
||||
int size = 0;
|
||||
VP8LBitReader* const br = &dec->br_;
|
||||
@ -364,18 +362,12 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
VP8LMetadata* const hdr = &dec->hdr_;
|
||||
uint32_t* huffman_image = NULL;
|
||||
HTreeGroup* htree_groups = NULL;
|
||||
HuffmanTables* huffman_tables = &hdr->huffman_tables_;
|
||||
HuffmanCode* huffman_tables = NULL;
|
||||
HuffmanCode* next = NULL;
|
||||
int num_htree_groups = 1;
|
||||
int num_htree_groups_max = 1;
|
||||
int max_alphabet_size = 0;
|
||||
int* code_lengths = NULL;
|
||||
const int table_size = kTableSize[color_cache_bits];
|
||||
int* mapping = NULL;
|
||||
int ok = 0;
|
||||
|
||||
// Check the table has been 0 initialized (through InitMetadata).
|
||||
assert(huffman_tables->root.start == NULL);
|
||||
assert(huffman_tables->curr_segment == NULL);
|
||||
|
||||
if (allow_recursion && VP8LReadBits(br, 1)) {
|
||||
// use meta Huffman codes.
|
||||
@ -392,36 +384,10 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
// The huffman data is stored in red and green bytes.
|
||||
const int group = (huffman_image[i] >> 8) & 0xffff;
|
||||
huffman_image[i] = group;
|
||||
if (group >= num_htree_groups_max) {
|
||||
num_htree_groups_max = group + 1;
|
||||
if (group >= num_htree_groups) {
|
||||
num_htree_groups = group + 1;
|
||||
}
|
||||
}
|
||||
// Check the validity of num_htree_groups_max. If it seems too big, use a
|
||||
// smaller value for later. This will prevent big memory allocations to end
|
||||
// up with a bad bitstream anyway.
|
||||
// The value of 1000 is totally arbitrary. We know that num_htree_groups_max
|
||||
// is smaller than (1 << 16) and should be smaller than the number of pixels
|
||||
// (though the format allows it to be bigger).
|
||||
if (num_htree_groups_max > 1000 || num_htree_groups_max > xsize * ysize) {
|
||||
// Create a mapping from the used indices to the minimal set of used
|
||||
// values [0, num_htree_groups)
|
||||
mapping = (int*)WebPSafeMalloc(num_htree_groups_max, sizeof(*mapping));
|
||||
if (mapping == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
goto Error;
|
||||
}
|
||||
// -1 means a value is unmapped, and therefore unused in the Huffman
|
||||
// image.
|
||||
memset(mapping, 0xff, num_htree_groups_max * sizeof(*mapping));
|
||||
for (num_htree_groups = 0, i = 0; i < huffman_pixs; ++i) {
|
||||
// Get the current mapping for the group and remap the Huffman image.
|
||||
int* const mapped_group = &mapping[huffman_image[i]];
|
||||
if (*mapped_group == -1) *mapped_group = num_htree_groups++;
|
||||
huffman_image[i] = *mapped_group;
|
||||
}
|
||||
} else {
|
||||
num_htree_groups = num_htree_groups_max;
|
||||
}
|
||||
}
|
||||
|
||||
if (br->eos_) goto Error;
|
||||
@ -437,105 +403,88 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
}
|
||||
}
|
||||
|
||||
huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size,
|
||||
sizeof(*huffman_tables));
|
||||
htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
|
||||
code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
|
||||
sizeof(*code_lengths));
|
||||
|
||||
if (htree_groups == NULL || code_lengths == NULL ||
|
||||
!VP8LHuffmanTablesAllocate(num_htree_groups * table_size,
|
||||
huffman_tables)) {
|
||||
if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
goto Error;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_htree_groups_max; ++i) {
|
||||
// If the index "i" is unused in the Huffman image, just make sure the
|
||||
// coefficients are valid but do not store them.
|
||||
if (mapping != NULL && mapping[i] == -1) {
|
||||
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
|
||||
int alphabet_size = kAlphabetSize[j];
|
||||
if (j == 0 && color_cache_bits > 0) {
|
||||
alphabet_size += (1 << color_cache_bits);
|
||||
}
|
||||
// Passing in NULL so that nothing gets filled.
|
||||
if (!ReadHuffmanCode(alphabet_size, dec, code_lengths, NULL)) {
|
||||
goto Error;
|
||||
}
|
||||
next = huffman_tables;
|
||||
for (i = 0; i < num_htree_groups; ++i) {
|
||||
HTreeGroup* const htree_group = &htree_groups[i];
|
||||
HuffmanCode** const htrees = htree_group->htrees;
|
||||
int size;
|
||||
int total_size = 0;
|
||||
int is_trivial_literal = 1;
|
||||
int max_bits = 0;
|
||||
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
|
||||
int alphabet_size = kAlphabetSize[j];
|
||||
htrees[j] = next;
|
||||
if (j == 0 && color_cache_bits > 0) {
|
||||
alphabet_size += 1 << color_cache_bits;
|
||||
}
|
||||
} else {
|
||||
HTreeGroup* const htree_group =
|
||||
&htree_groups[(mapping == NULL) ? i : mapping[i]];
|
||||
HuffmanCode** const htrees = htree_group->htrees;
|
||||
int size;
|
||||
int total_size = 0;
|
||||
int is_trivial_literal = 1;
|
||||
int max_bits = 0;
|
||||
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
|
||||
int alphabet_size = kAlphabetSize[j];
|
||||
if (j == 0 && color_cache_bits > 0) {
|
||||
alphabet_size += (1 << color_cache_bits);
|
||||
}
|
||||
size =
|
||||
ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables);
|
||||
htrees[j] = huffman_tables->curr_segment->curr_table;
|
||||
if (size == 0) {
|
||||
goto Error;
|
||||
}
|
||||
if (is_trivial_literal && kLiteralMap[j] == 1) {
|
||||
is_trivial_literal = (htrees[j]->bits == 0);
|
||||
}
|
||||
total_size += htrees[j]->bits;
|
||||
huffman_tables->curr_segment->curr_table += size;
|
||||
if (j <= ALPHA) {
|
||||
int local_max_bits = code_lengths[0];
|
||||
int k;
|
||||
for (k = 1; k < alphabet_size; ++k) {
|
||||
if (code_lengths[k] > local_max_bits) {
|
||||
local_max_bits = code_lengths[k];
|
||||
}
|
||||
size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next);
|
||||
if (size == 0) {
|
||||
goto Error;
|
||||
}
|
||||
if (is_trivial_literal && kLiteralMap[j] == 1) {
|
||||
is_trivial_literal = (next->bits == 0);
|
||||
}
|
||||
total_size += next->bits;
|
||||
next += size;
|
||||
if (j <= ALPHA) {
|
||||
int local_max_bits = code_lengths[0];
|
||||
int k;
|
||||
for (k = 1; k < alphabet_size; ++k) {
|
||||
if (code_lengths[k] > local_max_bits) {
|
||||
local_max_bits = code_lengths[k];
|
||||
}
|
||||
max_bits += local_max_bits;
|
||||
}
|
||||
max_bits += local_max_bits;
|
||||
}
|
||||
htree_group->is_trivial_literal = is_trivial_literal;
|
||||
htree_group->is_trivial_code = 0;
|
||||
if (is_trivial_literal) {
|
||||
const int red = htrees[RED][0].value;
|
||||
const int blue = htrees[BLUE][0].value;
|
||||
const int alpha = htrees[ALPHA][0].value;
|
||||
htree_group->literal_arb = ((uint32_t)alpha << 24) | (red << 16) | blue;
|
||||
if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
|
||||
htree_group->is_trivial_code = 1;
|
||||
htree_group->literal_arb |= htrees[GREEN][0].value << 8;
|
||||
}
|
||||
}
|
||||
htree_group->use_packed_table =
|
||||
!htree_group->is_trivial_code && (max_bits < HUFFMAN_PACKED_BITS);
|
||||
if (htree_group->use_packed_table) BuildPackedTable(htree_group);
|
||||
}
|
||||
htree_group->is_trivial_literal = is_trivial_literal;
|
||||
htree_group->is_trivial_code = 0;
|
||||
if (is_trivial_literal) {
|
||||
const int red = htrees[RED][0].value;
|
||||
const int blue = htrees[BLUE][0].value;
|
||||
const int alpha = htrees[ALPHA][0].value;
|
||||
htree_group->literal_arb =
|
||||
((uint32_t)alpha << 24) | (red << 16) | blue;
|
||||
if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
|
||||
htree_group->is_trivial_code = 1;
|
||||
htree_group->literal_arb |= htrees[GREEN][0].value << 8;
|
||||
}
|
||||
}
|
||||
htree_group->use_packed_table = !htree_group->is_trivial_code &&
|
||||
(max_bits < HUFFMAN_PACKED_BITS);
|
||||
if (htree_group->use_packed_table) BuildPackedTable(htree_group);
|
||||
}
|
||||
ok = 1;
|
||||
WebPSafeFree(code_lengths);
|
||||
|
||||
// All OK. Finalize pointers.
|
||||
// All OK. Finalize pointers and return.
|
||||
hdr->huffman_image_ = huffman_image;
|
||||
hdr->num_htree_groups_ = num_htree_groups;
|
||||
hdr->htree_groups_ = htree_groups;
|
||||
hdr->huffman_tables_ = huffman_tables;
|
||||
return 1;
|
||||
|
||||
Error:
|
||||
WebPSafeFree(code_lengths);
|
||||
WebPSafeFree(mapping);
|
||||
if (!ok) {
|
||||
WebPSafeFree(huffman_image);
|
||||
VP8LHuffmanTablesDeallocate(huffman_tables);
|
||||
VP8LHtreeGroupsFree(htree_groups);
|
||||
}
|
||||
return ok;
|
||||
WebPSafeFree(huffman_image);
|
||||
WebPSafeFree(huffman_tables);
|
||||
VP8LHtreeGroupsFree(htree_groups);
|
||||
return 0;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scaling.
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
const int num_channels = 4;
|
||||
const int in_width = io->mb_w;
|
||||
@ -567,13 +516,10 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
out_width, out_height, 0, num_channels, work);
|
||||
return 1;
|
||||
}
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Export to ARGB
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
// We have special "export" function since we need to convert from BGRA
|
||||
static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
|
||||
int rgba_stride, uint8_t* const rgba) {
|
||||
@ -615,8 +561,6 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
|
||||
return num_lines_out;
|
||||
}
|
||||
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
|
||||
// Emit rows without any scaling.
|
||||
static int EmitRows(WEBP_CSP_MODE colorspace,
|
||||
const uint8_t* row_in, int in_stride,
|
||||
@ -802,12 +746,9 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
|
||||
if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA
|
||||
const WebPRGBABuffer* const buf = &output->u.RGBA;
|
||||
uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
|
||||
const int num_rows_out =
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
io->use_scaling ?
|
||||
const int num_rows_out = io->use_scaling ?
|
||||
EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
|
||||
rgba, buf->stride) :
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
EmitRows(output->colorspace, rows_data, in_stride,
|
||||
io->mb_w, io->mb_h, rgba, buf->stride);
|
||||
// Update 'last_out_row_'.
|
||||
@ -934,11 +875,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
|
||||
#endif
|
||||
break;
|
||||
case 2:
|
||||
#if !defined(WORDS_BIGENDIAN)
|
||||
memcpy(&pattern, src, sizeof(uint16_t));
|
||||
#else
|
||||
pattern = ((uint32_t)src[0] << 8) | src[1];
|
||||
#endif
|
||||
#if defined(__arm__) || defined(_M_ARM)
|
||||
pattern |= pattern << 16;
|
||||
#elif defined(WEBP_USE_MIPS_DSP_R2)
|
||||
@ -1237,20 +1174,9 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
|
||||
}
|
||||
|
||||
br->eos_ = VP8LIsEndOfStream(br);
|
||||
// In incremental decoding:
|
||||
// br->eos_ && src < src_last: if 'br' reached the end of the buffer and
|
||||
// 'src_last' has not been reached yet, there is not enough data. 'dec' has to
|
||||
// be reset until there is more data.
|
||||
// !br->eos_ && src < src_last: this cannot happen as either the buffer is
|
||||
// fully read, either enough has been read to reach 'src_last'.
|
||||
// src >= src_last: 'src_last' is reached, all is fine. 'src' can actually go
|
||||
// beyond 'src_last' in case the image is cropped and an LZ77 goes further.
|
||||
// The buffer might have been enough or there is some left. 'br->eos_' does
|
||||
// not matter.
|
||||
assert(!dec->incremental_ || (br->eos_ && src < src_last) || src >= src_last);
|
||||
if (dec->incremental_ && br->eos_ && src < src_last) {
|
||||
if (dec->incremental_ && br->eos_ && src < src_end) {
|
||||
RestoreState(dec);
|
||||
} else if ((dec->incremental_ && src >= src_last) || !br->eos_) {
|
||||
} else if (!br->eos_) {
|
||||
// Process the remaining rows corresponding to last row-block.
|
||||
if (process_func != NULL) {
|
||||
process_func(dec, row > last_row ? last_row : row);
|
||||
@ -1369,7 +1295,7 @@ static void ClearMetadata(VP8LMetadata* const hdr) {
|
||||
assert(hdr != NULL);
|
||||
|
||||
WebPSafeFree(hdr->huffman_image_);
|
||||
VP8LHuffmanTablesDeallocate(&hdr->huffman_tables_);
|
||||
WebPSafeFree(hdr->huffman_tables_);
|
||||
VP8LHtreeGroupsFree(hdr->htree_groups_);
|
||||
VP8LColorCacheClear(&hdr->color_cache_);
|
||||
VP8LColorCacheClear(&hdr->saved_color_cache_);
|
||||
@ -1685,7 +1611,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
|
||||
// Sanity checks.
|
||||
if (dec == NULL) return 0;
|
||||
|
||||
assert(dec->hdr_.huffman_tables_.root.start != NULL);
|
||||
assert(dec->hdr_.huffman_tables_ != NULL);
|
||||
assert(dec->hdr_.htree_groups_ != NULL);
|
||||
assert(dec->hdr_.num_htree_groups_ > 0);
|
||||
|
||||
@ -1706,19 +1632,12 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
|
||||
|
||||
if (!AllocateInternalBuffers32b(dec, io->width)) goto Err;
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
|
||||
|
||||
if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
|
||||
// need the alpha-multiply functions for premultiplied output or rescaling
|
||||
WebPInitAlphaProcessing();
|
||||
}
|
||||
#else
|
||||
if (io->use_scaling) {
|
||||
dec->status_ = VP8_STATUS_INVALID_PARAM;
|
||||
goto Err;
|
||||
}
|
||||
#endif
|
||||
if (!WebPIsRGBMode(dec->output_->colorspace)) {
|
||||
WebPInitConvertARGBToYUV();
|
||||
if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing();
|
||||
|
@ -12,14 +12,14 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
// Vikas Arora(vikaas.arora@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_VP8LI_DEC_H_
|
||||
#define WEBP_DEC_VP8LI_DEC_H_
|
||||
#ifndef WEBP_DEC_VP8LI_H_
|
||||
#define WEBP_DEC_VP8LI_H_
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/bit_reader_utils.h"
|
||||
#include "src/utils/color_cache_utils.h"
|
||||
#include "src/utils/huffman_utils.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/bit_reader_utils.h"
|
||||
#include "../utils/color_cache_utils.h"
|
||||
#include "../utils/huffman_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -51,7 +51,7 @@ typedef struct {
|
||||
uint32_t *huffman_image_;
|
||||
int num_htree_groups_;
|
||||
HTreeGroup *htree_groups_;
|
||||
HuffmanTables huffman_tables_;
|
||||
HuffmanCode *huffman_tables_;
|
||||
} VP8LMetadata;
|
||||
|
||||
typedef struct VP8LDecoder VP8LDecoder;
|
||||
@ -132,4 +132,4 @@ void VP8LDelete(VP8LDecoder* const dec);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_VP8LI_DEC_H_ */
|
||||
#endif /* WEBP_DEC_VP8LI_H_ */
|
||||
|
@ -13,11 +13,11 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/mux_types.h" // ALPHA_FLAG
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/mux_types.h" // ALPHA_FLAG
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// RIFF layout is:
|
||||
@ -421,9 +421,7 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
|
||||
NULL, NULL, NULL, &has_animation,
|
||||
NULL, headers);
|
||||
if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
|
||||
// The WebPDemux API + libwebp can be used to decode individual
|
||||
// uncomposited frames or the WebPAnimDecoder can be used to fully
|
||||
// reconstruct them (see webp/demux.h).
|
||||
// TODO(jzern): full support of animation frames will require API additions.
|
||||
if (has_animation) {
|
||||
status = VP8_STATUS_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
@ -11,15 +11,15 @@
|
||||
//
|
||||
// Author: somnath@google.com (Somnath Banerjee)
|
||||
|
||||
#ifndef WEBP_DEC_WEBPI_DEC_H_
|
||||
#define WEBP_DEC_WEBPI_DEC_H_
|
||||
#ifndef WEBP_DEC_WEBPI_H_
|
||||
#define WEBP_DEC_WEBPI_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "src/utils/rescaler_utils.h"
|
||||
#include "src/dec/vp8_dec.h"
|
||||
#include "../utils/rescaler_utils.h"
|
||||
#include "./vp8_dec.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// WebPDecParams: Decoding output parameters. Transient internal object.
|
||||
@ -130,4 +130,4 @@ int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_WEBPI_DEC_H_ */
|
||||
#endif /* WEBP_DEC_WEBPI_H_ */
|
||||
|
@ -1,4 +1,3 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
|
||||
lib_LTLIBRARIES = libwebpdemux.la
|
||||
|
||||
libwebpdemux_la_SOURCES =
|
||||
@ -10,6 +9,6 @@ libwebpdemuxinclude_HEADERS += ../webp/mux_types.h
|
||||
libwebpdemuxinclude_HEADERS += ../webp/types.h
|
||||
|
||||
libwebpdemux_la_LIBADD = ../libwebp.la
|
||||
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:3:0
|
||||
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:2:0
|
||||
libwebpdemuxincludedir = $(includedir)/webp
|
||||
pkgconfig_DATA = libwebpdemux.pc
|
||||
|
@ -11,15 +11,15 @@
|
||||
//
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "src/webp/config.h"
|
||||
#include "../webp/config.h"
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/decode.h"
|
||||
#include "src/webp/demux.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/decode.h"
|
||||
#include "../webp/demux.h"
|
||||
|
||||
#define NUM_CHANNELS 4
|
||||
|
||||
|
@ -11,21 +11,21 @@
|
||||
//
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "src/webp/config.h"
|
||||
#include "../webp/config.h"
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/decode.h" // WebPGetFeatures
|
||||
#include "src/webp/demux.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/decode.h" // WebPGetFeatures
|
||||
#include "../webp/demux.h"
|
||||
#include "../webp/format_constants.h"
|
||||
|
||||
#define DMUX_MAJ_VERSION 0
|
||||
#define DMUX_MIN_VERSION 3
|
||||
#define DMUX_REV_VERSION 3
|
||||
#define DMUX_REV_VERSION 2
|
||||
|
||||
typedef struct {
|
||||
size_t start_; // start location of the data
|
||||
@ -205,14 +205,12 @@ static void SetFrameInfo(size_t start_offset, size_t size,
|
||||
frame->complete_ = complete;
|
||||
}
|
||||
|
||||
// Store image bearing chunks to 'frame'. 'min_size' is an optional size
|
||||
// requirement, it may be zero.
|
||||
// Store image bearing chunks to 'frame'.
|
||||
static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
|
||||
MemBuffer* const mem, Frame* const frame) {
|
||||
int alpha_chunks = 0;
|
||||
int image_chunks = 0;
|
||||
int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
|
||||
MemDataSize(mem) < min_size);
|
||||
int done = (MemDataSize(mem) < min_size);
|
||||
ParseStatus status = PARSE_OK;
|
||||
|
||||
if (done) return PARSE_NEED_MORE_DATA;
|
||||
@ -403,9 +401,9 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
|
||||
frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
|
||||
if (frame == NULL) return PARSE_ERROR;
|
||||
|
||||
// For the single image case we allow parsing of a partial frame, so no
|
||||
// minimum size is imposed here.
|
||||
status = StoreFrame(1, 0, &dmux->mem_, frame);
|
||||
// For the single image case we allow parsing of a partial frame, but we need
|
||||
// at least CHUNK_HEADER_SIZE for parsing.
|
||||
status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame);
|
||||
if (status != PARSE_ERROR) {
|
||||
const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
|
||||
// Clear any alpha when the alpha flag is missing.
|
||||
|
@ -6,8 +6,8 @@
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 0,3,0,3
|
||||
PRODUCTVERSION 0,3,0,3
|
||||
FILEVERSION 0,3,0,2
|
||||
PRODUCTVERSION 0,3,0,2
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -24,12 +24,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libwebpdemux DLL"
|
||||
VALUE "FileVersion", "0.3.3"
|
||||
VALUE "FileVersion", "0.3.2"
|
||||
VALUE "InternalName", "libwebpdemux.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2017"
|
||||
VALUE "OriginalFilename", "libwebpdemux.dll"
|
||||
VALUE "ProductName", "WebP Image Demuxer"
|
||||
VALUE "ProductVersion", "0.3.3"
|
||||
VALUE "ProductVersion", "0.3.2"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -1,15 +1,9 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
|
||||
noinst_LTLIBRARIES =
|
||||
noinst_LTLIBRARIES += libwebpdsp.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_avx2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_sse2.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_sse2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_sse41.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_sse41.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_neon.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_neon.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_msa.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_msa.la
|
||||
noinst_LTLIBRARIES = libwebpdsp.la libwebpdsp_avx2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_sse2.la libwebpdspdecode_sse2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_sse41.la libwebpdspdecode_sse41.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_neon.la libwebpdspdecode_neon.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_msa.la libwebpdspdecode_msa.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_wasm.la
|
||||
|
||||
if BUILD_LIBWEBPDECODER
|
||||
noinst_LTLIBRARIES += libwebpdspdecode.la
|
||||
@ -46,6 +40,8 @@ COMMON_SOURCES += yuv_mips32.c
|
||||
COMMON_SOURCES += yuv_mips_dsp_r2.c
|
||||
|
||||
ENC_SOURCES =
|
||||
ENC_SOURCES += argb.c
|
||||
ENC_SOURCES += argb_mips_dsp_r2.c
|
||||
ENC_SOURCES += cost.c
|
||||
ENC_SOURCES += cost_mips32.c
|
||||
ENC_SOURCES += cost_mips_dsp_r2.c
|
||||
@ -101,7 +97,12 @@ libwebpdspdecode_msa_la_SOURCES += upsampling_msa.c
|
||||
libwebpdspdecode_msa_la_CPPFLAGS = $(libwebpdsp_msa_la_CPPFLAGS)
|
||||
libwebpdspdecode_msa_la_CFLAGS = $(libwebpdsp_msa_la_CFLAGS)
|
||||
|
||||
# WASM is not fully integrated into configure; the addition here keeps source
|
||||
# extraction by cmake simple.
|
||||
libwebpdspdecode_wasm_la_SOURCES = dec_wasm.c
|
||||
|
||||
libwebpdsp_sse2_la_SOURCES =
|
||||
libwebpdsp_sse2_la_SOURCES += argb_sse2.c
|
||||
libwebpdsp_sse2_la_SOURCES += cost_sse2.c
|
||||
libwebpdsp_sse2_la_SOURCES += enc_sse2.c
|
||||
libwebpdsp_sse2_la_SOURCES += lossless_enc_sse2.c
|
||||
@ -142,8 +143,7 @@ libwebpdsp_la_CPPFLAGS += $(AM_CPPFLAGS)
|
||||
libwebpdsp_la_CPPFLAGS += $(USE_EXPERIMENTAL_CODE) $(USE_SWAP_16BIT_CSP)
|
||||
libwebpdsp_la_LDFLAGS = -lm
|
||||
libwebpdsp_la_LIBADD =
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_sse2.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la libwebpdsp_sse2.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_sse41.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_neon.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_msa.la
|
||||
|
@ -12,13 +12,10 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <assert.h>
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
// Tables can be faster on some platform but incur some extra binary size (~2k).
|
||||
#if !defined(USE_TABLES_FOR_ALPHA_MULT)
|
||||
#define USE_TABLES_FOR_ALPHA_MULT 0 // ALTERNATE_CODE
|
||||
#endif
|
||||
|
||||
// #define USE_TABLES_FOR_ALPHA_MULT
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@ -32,7 +29,7 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
|
||||
return v;
|
||||
}
|
||||
|
||||
#if (USE_TABLES_FOR_ALPHA_MULT == 1)
|
||||
#ifdef USE_TABLES_FOR_ALPHA_MULT
|
||||
|
||||
static const uint32_t kMultTables[2][256] = {
|
||||
{ // (255u << MFIX) / alpha
|
||||
@ -135,9 +132,9 @@ static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
|
||||
return inverse ? (255u << MFIX) / a : a * KINV_255;
|
||||
}
|
||||
|
||||
#endif // USE_TABLES_FOR_ALPHA_MULT
|
||||
#endif // USE_TABLES_FOR_ALPHA_MULT
|
||||
|
||||
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
|
||||
void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
const uint32_t argb = ptr[x];
|
||||
@ -157,8 +154,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
|
||||
}
|
||||
}
|
||||
|
||||
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse) {
|
||||
void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
const uint32_t a = alpha[x];
|
||||
@ -220,9 +217,8 @@ void WebPMultRows(uint8_t* ptr, int stride,
|
||||
#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
|
||||
#endif
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
|
||||
int w, int h, int stride) {
|
||||
static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
|
||||
int w, int h, int stride) {
|
||||
while (h-- > 0) {
|
||||
uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
|
||||
const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
|
||||
@ -239,7 +235,6 @@ static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
|
||||
rgba += stride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
#undef MULTIPLIER
|
||||
#undef PREMULTIPLY
|
||||
|
||||
@ -259,9 +254,9 @@ static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
|
||||
return (x * m) >> 16;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
|
||||
int w, int h, int stride,
|
||||
int rg_byte_pos /* 0 or 1 */) {
|
||||
static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
|
||||
int w, int h, int stride,
|
||||
int rg_byte_pos /* 0 or 1 */) {
|
||||
while (h-- > 0) {
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
@ -280,16 +275,15 @@ static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
|
||||
}
|
||||
#undef MULTIPLIER
|
||||
|
||||
static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
|
||||
int w, int h, int stride) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
|
||||
static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
|
||||
int w, int h, int stride) {
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
|
||||
#else
|
||||
ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0);
|
||||
ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
@ -344,46 +338,6 @@ static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int HasAlpha8b_C(const uint8_t* src, int length) {
|
||||
while (length-- > 0) if (*src++ != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HasAlpha32b_C(const uint8_t* src, int length) {
|
||||
int x;
|
||||
for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple channel manipulations.
|
||||
|
||||
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
|
||||
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
|
||||
}
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, uint32_t* out) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out) {
|
||||
int i, offset = 0;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
|
||||
offset += step;
|
||||
}
|
||||
}
|
||||
|
||||
void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
|
||||
void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
|
||||
@ -391,15 +345,6 @@ int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
|
||||
int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int, uint32_t*);
|
||||
#endif
|
||||
void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
|
||||
int (*WebPHasAlpha8b)(const uint8_t* src, int length);
|
||||
int (*WebPHasAlpha32b)(const uint8_t* src, int length);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Init function
|
||||
@ -415,24 +360,15 @@ static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
|
||||
if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WebPMultARGBRow = WebPMultARGBRow_C;
|
||||
WebPMultRow = WebPMultRow_C;
|
||||
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C;
|
||||
WebPMultARGBRow = WebPMultARGBRowC;
|
||||
WebPMultRow = WebPMultRowC;
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
|
||||
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
WebPPackARGB = PackARGB_C;
|
||||
#endif
|
||||
WebPPackRGB = PackRGB_C;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
|
||||
WebPDispatchAlpha = DispatchAlpha_C;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C;
|
||||
WebPExtractAlpha = ExtractAlpha_C;
|
||||
WebPExtractGreen = ExtractGreen_C;
|
||||
#endif
|
||||
|
||||
WebPHasAlpha8b = HasAlpha8b_C;
|
||||
WebPHasAlpha32b = HasAlpha32b_C;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@ -446,34 +382,16 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
WebPInitAlphaProcessingNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
WebPInitAlphaProcessingMIPSdspR2();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitAlphaProcessingNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(WebPMultARGBRow != NULL);
|
||||
assert(WebPMultRow != NULL);
|
||||
assert(WebPApplyAlphaMultiply != NULL);
|
||||
assert(WebPApplyAlphaMultiply4444 != NULL);
|
||||
assert(WebPDispatchAlpha != NULL);
|
||||
assert(WebPDispatchAlphaToGreen != NULL);
|
||||
assert(WebPExtractAlpha != NULL);
|
||||
assert(WebPExtractGreen != NULL);
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
assert(WebPPackARGB != NULL);
|
||||
#endif
|
||||
assert(WebPPackRGB != NULL);
|
||||
assert(WebPHasAlpha8b != NULL);
|
||||
assert(WebPHasAlpha32b != NULL);
|
||||
|
||||
alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
|
@ -12,13 +12,13 @@
|
||||
// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
|
||||
// Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
uint32_t alpha_mask = 0xffffffff;
|
||||
int i, j, temp0;
|
||||
|
||||
@ -79,8 +79,7 @@ static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
|
||||
return (alpha_mask != 0xff);
|
||||
}
|
||||
|
||||
static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
|
||||
int inverse) {
|
||||
static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
|
||||
int x;
|
||||
const uint32_t c_00ffffff = 0x00ffffffu;
|
||||
const uint32_t c_ff000000 = 0xff000000u;
|
||||
@ -125,100 +124,14 @@ static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
|
||||
const uint8_t* g, const uint8_t* b, int len,
|
||||
uint32_t* out) {
|
||||
int temp0, temp1, temp2, temp3, offset;
|
||||
const int rest = len & 1;
|
||||
const uint32_t* const loop_end = out + len - rest;
|
||||
const int step = 4;
|
||||
__asm__ volatile (
|
||||
"xor %[offset], %[offset], %[offset] \n\t"
|
||||
"beq %[loop_end], %[out], 0f \n\t"
|
||||
"2: \n\t"
|
||||
"lbux %[temp0], %[offset](%[a]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp3], %[offset](%[b]) \n\t"
|
||||
"ins %[temp1], %[temp0], 16, 16 \n\t"
|
||||
"ins %[temp3], %[temp2], 16, 16 \n\t"
|
||||
"addiu %[out], %[out], 4 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
|
||||
"sw %[temp0], -4(%[out]) \n\t"
|
||||
"addu %[offset], %[offset], %[step] \n\t"
|
||||
"bne %[loop_end], %[out], 2b \n\t"
|
||||
"0: \n\t"
|
||||
"beq %[rest], $zero, 1f \n\t"
|
||||
"lbux %[temp0], %[offset](%[a]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp3], %[offset](%[b]) \n\t"
|
||||
"ins %[temp1], %[temp0], 16, 16 \n\t"
|
||||
"ins %[temp3], %[temp2], 16, 16 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
|
||||
"sw %[temp0], 0(%[out]) \n\t"
|
||||
"1: \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||
[temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
|
||||
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
|
||||
[loop_end]"r"(loop_end), [rest]"r"(rest)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
#endif // WORDS_BIGENDIAN
|
||||
|
||||
static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, int step,
|
||||
uint32_t* out) {
|
||||
int temp0, temp1, temp2, offset;
|
||||
const int rest = len & 1;
|
||||
const int a = 0xff;
|
||||
const uint32_t* const loop_end = out + len - rest;
|
||||
__asm__ volatile (
|
||||
"xor %[offset], %[offset], %[offset] \n\t"
|
||||
"beq %[loop_end], %[out], 0f \n\t"
|
||||
"2: \n\t"
|
||||
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||
"addiu %[out], %[out], 4 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||
"sw %[temp0], -4(%[out]) \n\t"
|
||||
"addu %[offset], %[offset], %[step] \n\t"
|
||||
"bne %[loop_end], %[out], 2b \n\t"
|
||||
"0: \n\t"
|
||||
"beq %[rest], $zero, 1f \n\t"
|
||||
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||
"sw %[temp0], 0(%[out]) \n\t"
|
||||
"1: \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||
[offset]"=&r"(offset), [out]"+&r"(out)
|
||||
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
|
||||
[loop_end]"r"(loop_end), [rest]"r"(rest)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void WebPInitAlphaProcessingMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
|
||||
WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
|
||||
WebPMultARGBRow = MultARGBRow_MIPSdspR2;
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
WebPPackARGB = PackARGB_MIPSdspR2;
|
||||
#endif
|
||||
WebPPackRGB = PackRGB_MIPSdspR2;
|
||||
WebPDispatchAlpha = DispatchAlpha;
|
||||
WebPMultARGBRow = MultARGBRow;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
@ -11,11 +11,11 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include "src/dsp/neon.h"
|
||||
#include "./neon.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@ -83,7 +83,7 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
|
||||
static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
uint32_t alpha_mask = 0xffu;
|
||||
uint32_t alpha_mask = 0xffffffffu;
|
||||
uint8x8_t mask8 = vdup_n_u8(0xff);
|
||||
uint32_t tmp[2];
|
||||
int i, j;
|
||||
@ -107,7 +107,6 @@ static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
|
||||
dst += dst_stride;
|
||||
}
|
||||
vst1_u8((uint8_t*)tmp, mask8);
|
||||
alpha_mask *= 0x01010101;
|
||||
alpha_mask &= tmp[0];
|
||||
alpha_mask &= tmp[1];
|
||||
return (alpha_mask != 0xffffffffu);
|
||||
@ -135,7 +134,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride,
|
||||
static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
uint32_t alpha_mask = 0xffu;
|
||||
uint32_t alpha_mask = 0xffffffffu;
|
||||
uint8x8_t mask8 = vdup_n_u8(0xff);
|
||||
uint32_t tmp[2];
|
||||
int i, j;
|
||||
@ -157,7 +156,6 @@ static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
|
||||
alpha += alpha_stride;
|
||||
}
|
||||
vst1_u8((uint8_t*)tmp, mask8);
|
||||
alpha_mask *= 0x01010101;
|
||||
alpha_mask &= tmp[0];
|
||||
alpha_mask &= tmp[1];
|
||||
return (alpha_mask == 0xffffffffu);
|
||||
|
@ -11,16 +11,16 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
@ -72,9 +72,9 @@ static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
return (alpha_and != 0xff);
|
||||
}
|
||||
|
||||
static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
int i, j;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const int limit = width & ~15;
|
||||
@ -98,9 +98,9 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
}
|
||||
}
|
||||
|
||||
static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
@ -210,61 +210,6 @@ static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
|
||||
#undef MULTIPLIER
|
||||
#undef PREMULTIPLY
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Alpha detection
|
||||
|
||||
static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
|
||||
const __m128i all_0xff = _mm_set1_epi8(0xff);
|
||||
int i = 0;
|
||||
for (; i + 16 <= length; i += 16) {
|
||||
const __m128i v = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i bits = _mm_cmpeq_epi8(v, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i < length; ++i) if (src[i] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
|
||||
const __m128i alpha_mask = _mm_set1_epi32(0xff);
|
||||
const __m128i all_0xff = _mm_set1_epi8(0xff);
|
||||
int i = 0;
|
||||
// We don't know if we can access the last 3 bytes after the last alpha
|
||||
// value 'src[4 * length - 4]' (because we don't know if alpha is the first
|
||||
// or the last byte of the quadruplet). Hence the '-3' protection below.
|
||||
length = length * 4 - 3; // size in bytes
|
||||
for (; i + 64 <= length; i += 64) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
|
||||
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
|
||||
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
|
||||
const __m128i b2 = _mm_and_si128(a2, alpha_mask);
|
||||
const __m128i b3 = _mm_and_si128(a3, alpha_mask);
|
||||
const __m128i c0 = _mm_packs_epi32(b0, b1);
|
||||
const __m128i c1 = _mm_packs_epi32(b2, b3);
|
||||
const __m128i d = _mm_packus_epi16(c0, c1);
|
||||
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i + 32 <= length; i += 32) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
|
||||
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
|
||||
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
|
||||
const __m128i c = _mm_packs_epi32(b0, b1);
|
||||
const __m128i d = _mm_packus_epi16(c, c);
|
||||
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Apply alpha value to rows
|
||||
|
||||
@ -293,7 +238,7 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
|
||||
}
|
||||
}
|
||||
width -= x;
|
||||
if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse);
|
||||
if (width > 0) WebPMultARGBRowC(ptr + x, width, inverse);
|
||||
}
|
||||
|
||||
static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
@ -316,7 +261,7 @@ static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
}
|
||||
}
|
||||
width -= x;
|
||||
if (width > 0) WebPMultRow_C(ptr + x, alpha + x, width, inverse);
|
||||
if (width > 0) WebPMultRowC(ptr + x, alpha + x, width, inverse);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -328,12 +273,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
|
||||
WebPMultARGBRow = MultARGBRow_SSE2;
|
||||
WebPMultRow = MultRow_SSE2;
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply_SSE2;
|
||||
WebPDispatchAlpha = DispatchAlpha_SSE2;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
|
||||
WebPExtractAlpha = ExtractAlpha_SSE2;
|
||||
|
||||
WebPHasAlpha8b = HasAlpha8b_SSE2;
|
||||
WebPHasAlpha32b = HasAlpha32b_SSE2;
|
||||
WebPDispatchAlpha = DispatchAlpha;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
|
||||
WebPExtractAlpha = ExtractAlpha;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
@ -19,9 +19,9 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
@ -82,7 +82,7 @@ static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
|
||||
extern void WebPInitAlphaProcessingSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
|
||||
WebPExtractAlpha = ExtractAlpha_SSE41;
|
||||
WebPExtractAlpha = ExtractAlpha;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
68
src/dsp/argb.c
Normal file
68
src/dsp/argb.c
Normal file
@ -0,0 +1,68 @@
|
||||
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// ARGB making functions.
|
||||
//
|
||||
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
|
||||
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
|
||||
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
|
||||
}
|
||||
|
||||
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, uint32_t* out) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out) {
|
||||
int i, offset = 0;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
|
||||
offset += step;
|
||||
}
|
||||
}
|
||||
|
||||
void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
|
||||
const uint8_t*, int, uint32_t*);
|
||||
void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
|
||||
int, int, uint32_t*);
|
||||
|
||||
extern void VP8EncDspARGBInitMIPSdspR2(void);
|
||||
extern void VP8EncDspARGBInitSSE2(void);
|
||||
|
||||
static volatile VP8CPUInfo argb_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&argb_last_cpuinfo_used;
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
|
||||
if (argb_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
VP8PackARGB = PackARGB;
|
||||
VP8PackRGB = PackRGB;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
VP8EncDspARGBInitSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
VP8EncDspARGBInitMIPSdspR2();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
argb_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
110
src/dsp/argb_mips_dsp_r2.c
Normal file
110
src/dsp/argb_mips_dsp_r2.c
Normal file
@ -0,0 +1,110 @@
|
||||
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// ARGB making functions (mips version).
|
||||
//
|
||||
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, uint32_t* out) {
|
||||
int temp0, temp1, temp2, temp3, offset;
|
||||
const int rest = len & 1;
|
||||
const uint32_t* const loop_end = out + len - rest;
|
||||
const int step = 4;
|
||||
__asm__ volatile (
|
||||
"xor %[offset], %[offset], %[offset] \n\t"
|
||||
"beq %[loop_end], %[out], 0f \n\t"
|
||||
"2: \n\t"
|
||||
"lbux %[temp0], %[offset](%[a]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp3], %[offset](%[b]) \n\t"
|
||||
"ins %[temp1], %[temp0], 16, 16 \n\t"
|
||||
"ins %[temp3], %[temp2], 16, 16 \n\t"
|
||||
"addiu %[out], %[out], 4 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
|
||||
"sw %[temp0], -4(%[out]) \n\t"
|
||||
"addu %[offset], %[offset], %[step] \n\t"
|
||||
"bne %[loop_end], %[out], 2b \n\t"
|
||||
"0: \n\t"
|
||||
"beq %[rest], $zero, 1f \n\t"
|
||||
"lbux %[temp0], %[offset](%[a]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp3], %[offset](%[b]) \n\t"
|
||||
"ins %[temp1], %[temp0], 16, 16 \n\t"
|
||||
"ins %[temp3], %[temp2], 16, 16 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
|
||||
"sw %[temp0], 0(%[out]) \n\t"
|
||||
"1: \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||
[temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
|
||||
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
|
||||
[loop_end]"r"(loop_end), [rest]"r"(rest)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out) {
|
||||
int temp0, temp1, temp2, offset;
|
||||
const int rest = len & 1;
|
||||
const int a = 0xff;
|
||||
const uint32_t* const loop_end = out + len - rest;
|
||||
__asm__ volatile (
|
||||
"xor %[offset], %[offset], %[offset] \n\t"
|
||||
"beq %[loop_end], %[out], 0f \n\t"
|
||||
"2: \n\t"
|
||||
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||
"addiu %[out], %[out], 4 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||
"sw %[temp0], -4(%[out]) \n\t"
|
||||
"addu %[offset], %[offset], %[step] \n\t"
|
||||
"bne %[loop_end], %[out], 2b \n\t"
|
||||
"0: \n\t"
|
||||
"beq %[rest], $zero, 1f \n\t"
|
||||
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||
"sw %[temp0], 0(%[out]) \n\t"
|
||||
"1: \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||
[offset]"=&r"(offset), [out]"+&r"(out)
|
||||
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
|
||||
[loop_end]"r"(loop_end), [rest]"r"(rest)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void VP8EncDspARGBInitMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
|
||||
VP8PackARGB = PackARGB;
|
||||
VP8PackRGB = PackRGB;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
||||
WEBP_DSP_INIT_STUB(VP8EncDspARGBInitMIPSdspR2)
|
||||
|
||||
#endif // WEBP_USE_MIPS_DSP_R2
|
53
src/dsp/argb_sse2.c
Normal file
53
src/dsp/argb_sse2.c
Normal file
@ -0,0 +1,53 @@
|
||||
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// ARGB making functions (SSE2 version).
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "./lossless.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include <assert.h>
|
||||
#include <emmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, uint32_t* out) {
|
||||
(void)a;
|
||||
if (g == r + 1) { // RGBA input order. Need to swap R and B.
|
||||
assert(b == r + 2);
|
||||
assert(a == r + 3);
|
||||
VP8LConvertBGRAToRGBA((const uint32_t*)r, len, (uint8_t*)out);
|
||||
} else {
|
||||
assert(g == b + 1);
|
||||
assert(r == b + 2);
|
||||
assert(a == b + 3);
|
||||
memcpy(out, b, len * 4);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void VP8EncDspARGBInitSSE2(void);
|
||||
extern void VP8LDspInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) {
|
||||
VP8LDspInitSSE2();
|
||||
VP8PackARGB = PackARGB;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
WEBP_DSP_INIT_STUB(VP8EncDspARGBInitSSE2)
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
@ -9,8 +9,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "./dsp.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Boolean-cost cost table
|
||||
@ -319,7 +319,7 @@ const uint8_t VP8EncBands[16 + 1] = {
|
||||
//------------------------------------------------------------------------------
|
||||
// Mode costs
|
||||
|
||||
static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
|
||||
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
int n = res->first;
|
||||
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||
const int p0 = res->prob[n][ctx0][0];
|
||||
@ -354,8 +354,8 @@ static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_C(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
int n;
|
||||
res->last = -1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
@ -384,8 +384,8 @@ static volatile VP8CPUInfo cost_last_cpuinfo_used =
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
|
||||
if (cost_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
VP8GetResidualCost = GetResidualCost_C;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs_C;
|
||||
VP8GetResidualCost = GetResidualCost;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
|
@ -9,13 +9,13 @@
|
||||
//
|
||||
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
|
||||
static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
|
||||
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
int temp0, temp1;
|
||||
int v_reg, ctx_reg;
|
||||
int n = res->first;
|
||||
@ -96,8 +96,8 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
const int16_t* p_coeffs = (int16_t*)coeffs;
|
||||
int temp0, temp1, temp2, n, n1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
@ -143,8 +143,8 @@ static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
|
||||
extern void VP8EncDspCostInitMIPS32(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) {
|
||||
VP8GetResidualCost = GetResidualCost_MIPS32;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs_MIPS32;
|
||||
VP8GetResidualCost = GetResidualCost;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS32
|
||||
|
@ -9,13 +9,13 @@
|
||||
//
|
||||
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
|
||||
static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
|
||||
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
int temp0, temp1;
|
||||
int v_reg, ctx_reg;
|
||||
int n = res->first;
|
||||
@ -97,7 +97,7 @@ static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
|
||||
extern void VP8EncDspCostInitMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void) {
|
||||
VP8GetResidualCost = GetResidualCost_MIPSdspR2;
|
||||
VP8GetResidualCost = GetResidualCost;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
@ -11,19 +11,19 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
|
||||
const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
|
||||
// Use SSE2 to compare 16 values with a single instruction.
|
||||
@ -42,7 +42,7 @@ static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
|
||||
res->coeffs = coeffs;
|
||||
}
|
||||
|
||||
static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
|
||||
static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
|
||||
uint8_t levels[16], ctxs[16];
|
||||
uint16_t abs_levels[16];
|
||||
int n = res->first;
|
||||
@ -108,8 +108,8 @@ static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
|
||||
extern void VP8EncDspCostInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs_SSE2;
|
||||
VP8GetResidualCost = GetResidualCost_SSE2;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffsSSE2;
|
||||
VP8GetResidualCost = GetResidualCostSSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Christian Duvivier (cduvivier@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_HAVE_NEON_RTCD)
|
||||
#include <stdio.h>
|
||||
@ -23,11 +23,13 @@
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// SSE2 detection.
|
||||
// x86/x86-64 micro-arch detection.
|
||||
//
|
||||
|
||||
// skip x86 specific code for WASM builds
|
||||
#if defined(WEBP_USE_WASM)
|
||||
// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
|
||||
#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
|
||||
#elif (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
|
||||
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
|
||||
__asm__ volatile (
|
||||
"mov %%ebx, %%edi\n"
|
||||
@ -63,8 +65,10 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
|
||||
#define GetCPUInfo __cpuid
|
||||
#endif
|
||||
|
||||
// skip xgetbv definition for WASM builds
|
||||
#if defined(WEBP_USE_WASM)
|
||||
// NaCl has no support for xgetbv or the raw opcode.
|
||||
#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
|
||||
#elif !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
|
||||
static WEBP_INLINE uint64_t xgetbv(void) {
|
||||
const uint32_t ecx = 0;
|
||||
uint32_t eax, edx;
|
||||
@ -94,7 +98,19 @@ static WEBP_INLINE uint64_t xgetbv(void) {
|
||||
#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
|
||||
//------------------------------------------------------------------------------
|
||||
// Platform specific VP8CPUInfo functions.
|
||||
//
|
||||
|
||||
// WASM needs to precede platform specific architecture checks as the defines
|
||||
// will still be present when building this target.
|
||||
#if defined(WEBP_USE_WASM)
|
||||
static int wasmCPUInfo(CPUFeature feature) {
|
||||
if (feature != kWASM) return 0;
|
||||
return 1;
|
||||
}
|
||||
VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
|
||||
#elif defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
|
||||
|
||||
// helper function for run-time detection of slow SSSE3 platforms
|
||||
static int CheckSlowModel(int info) {
|
||||
@ -143,7 +159,7 @@ static int x86CPUInfo(CPUFeature feature) {
|
||||
return !!(cpu_info[2] & (1 << 0));
|
||||
}
|
||||
if (feature == kSlowSSSE3) {
|
||||
if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3?
|
||||
if (is_intel && (cpu_info[2] & (1 << 0))) { // SSSE3?
|
||||
return CheckSlowModel(cpu_info[0]);
|
||||
}
|
||||
return 0;
|
||||
|
397
src/dsp/dec.c
397
src/dsp/dec.c
@ -11,11 +11,9 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "./dsp.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@ -27,7 +25,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
#define STORE(x, y, v) \
|
||||
dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
|
||||
dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
|
||||
|
||||
#define STORE2(y, dc, d, c) do { \
|
||||
const int DC = (dc); \
|
||||
@ -40,8 +38,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
#define MUL1(a) ((((a) * 20091) >> 16) + (a))
|
||||
#define MUL2(a) (((a) * 35468) >> 16)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -81,7 +78,7 @@ static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
|
||||
// Simplified transform when only in[0], in[1] and in[4] are non-zero
|
||||
static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = MUL2(in[4]);
|
||||
const int d4 = MUL1(in[4]);
|
||||
@ -96,21 +93,19 @@ static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
#undef MUL2
|
||||
#undef STORE2
|
||||
|
||||
static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne_C(in, dst);
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne_C(in + 16, dst + 4);
|
||||
TransformOne(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformUV_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformUV(const int16_t* in, uint8_t* dst) {
|
||||
VP8Transform(in + 0 * 16, dst, 1);
|
||||
VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
const int DC = in[0] + 4;
|
||||
int i, j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
@ -119,9 +114,8 @@ static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDCUV(const int16_t* in, uint8_t* dst) {
|
||||
if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
|
||||
if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
|
||||
if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
|
||||
@ -133,8 +127,7 @@ static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Paragraph 14.3
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
int tmp[16];
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -160,7 +153,6 @@ static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
out += 64;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
|
||||
@ -169,7 +161,6 @@ void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
|
||||
#define DST(x, y) dst[(x) + (y) * BPS]
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
const uint8_t* top = dst - BPS;
|
||||
const uint8_t* const clip0 = VP8kclip1 - top[-1];
|
||||
@ -183,21 +174,21 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
dst += BPS;
|
||||
}
|
||||
}
|
||||
static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); }
|
||||
static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
|
||||
static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); }
|
||||
static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
|
||||
static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
|
||||
static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// 16x16
|
||||
|
||||
static void VE16_C(uint8_t* dst) { // vertical
|
||||
static void VE16(uint8_t* dst) { // vertical
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
memcpy(dst + j * BPS, dst - BPS, 16);
|
||||
}
|
||||
}
|
||||
|
||||
static void HE16_C(uint8_t* dst) { // horizontal
|
||||
static void HE16(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 16; j > 0; --j) {
|
||||
memset(dst, dst[-1], 16);
|
||||
@ -212,7 +203,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC16_C(uint8_t* dst) { // DC
|
||||
static void DC16(uint8_t* dst) { // DC
|
||||
int DC = 16;
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -221,7 +212,7 @@ static void DC16_C(uint8_t* dst) { // DC
|
||||
Put16(DC >> 5, dst);
|
||||
}
|
||||
|
||||
static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available
|
||||
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
|
||||
int DC = 8;
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -230,7 +221,7 @@ static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available
|
||||
Put16(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoLeft_C(uint8_t* dst) { // DC with left samples not available
|
||||
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
|
||||
int DC = 8;
|
||||
int i;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
@ -239,10 +230,9 @@ static void DC16NoLeft_C(uint8_t* dst) { // DC with left samples not available
|
||||
Put16(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoTopLeft_C(uint8_t* dst) { // DC with no top and left samples
|
||||
static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
|
||||
Put16(0x80, dst);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
|
||||
|
||||
@ -252,8 +242,7 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
|
||||
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VE4_C(uint8_t* dst) { // vertical
|
||||
static void VE4(uint8_t* dst) { // vertical
|
||||
const uint8_t* top = dst - BPS;
|
||||
const uint8_t vals[4] = {
|
||||
AVG3(top[-1], top[0], top[1]),
|
||||
@ -266,9 +255,8 @@ static void VE4_C(uint8_t* dst) { // vertical
|
||||
memcpy(dst + i * BPS, vals, sizeof(vals));
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void HE4_C(uint8_t* dst) { // horizontal
|
||||
static void HE4(uint8_t* dst) { // horizontal
|
||||
const int A = dst[-1 - BPS];
|
||||
const int B = dst[-1];
|
||||
const int C = dst[-1 + BPS];
|
||||
@ -280,8 +268,7 @@ static void HE4_C(uint8_t* dst) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void DC4_C(uint8_t* dst) { // DC
|
||||
static void DC4(uint8_t* dst) { // DC
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
|
||||
@ -289,7 +276,7 @@ static void DC4_C(uint8_t* dst) { // DC
|
||||
for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
|
||||
}
|
||||
|
||||
static void RD4_C(uint8_t* dst) { // Down-right
|
||||
static void RD4(uint8_t* dst) { // Down-right
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@ -308,7 +295,7 @@ static void RD4_C(uint8_t* dst) { // Down-right
|
||||
DST(3, 0) = AVG3(D, C, B);
|
||||
}
|
||||
|
||||
static void LD4_C(uint8_t* dst) { // Down-Left
|
||||
static void LD4(uint8_t* dst) { // Down-Left
|
||||
const int A = dst[0 - BPS];
|
||||
const int B = dst[1 - BPS];
|
||||
const int C = dst[2 - BPS];
|
||||
@ -325,9 +312,8 @@ static void LD4_C(uint8_t* dst) { // Down-Left
|
||||
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
|
||||
DST(3, 3) = AVG3(G, H, H);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void VR4_C(uint8_t* dst) { // Vertical-Right
|
||||
static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@ -349,7 +335,7 @@ static void VR4_C(uint8_t* dst) { // Vertical-Right
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static void VL4_C(uint8_t* dst) { // Vertical-Left
|
||||
static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
const int A = dst[0 - BPS];
|
||||
const int B = dst[1 - BPS];
|
||||
const int C = dst[2 - BPS];
|
||||
@ -371,7 +357,7 @@ static void VL4_C(uint8_t* dst) { // Vertical-Left
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static void HU4_C(uint8_t* dst) { // Horizontal-Up
|
||||
static void HU4(uint8_t* dst) { // Horizontal-Up
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@ -386,7 +372,7 @@ static void HU4_C(uint8_t* dst) { // Horizontal-Up
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static void HD4_C(uint8_t* dst) { // Horizontal-Down
|
||||
static void HD4(uint8_t* dst) { // Horizontal-Down
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@ -418,15 +404,14 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES];
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VE8uv_C(uint8_t* dst) { // vertical
|
||||
static void VE8uv(uint8_t* dst) { // vertical
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
memcpy(dst + j * BPS, dst - BPS, 8);
|
||||
}
|
||||
}
|
||||
|
||||
static void HE8uv_C(uint8_t* dst) { // horizontal
|
||||
static void HE8uv(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
memset(dst, dst[-1], 8);
|
||||
@ -442,7 +427,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC8uv_C(uint8_t* dst) { // DC
|
||||
static void DC8uv(uint8_t* dst) { // DC
|
||||
int dc0 = 8;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -451,7 +436,7 @@ static void DC8uv_C(uint8_t* dst) { // DC
|
||||
Put8x8uv(dc0 >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples
|
||||
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
|
||||
int dc0 = 4;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -460,7 +445,7 @@ static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples
|
||||
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
|
||||
int dc0 = 4;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -469,19 +454,17 @@ static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing
|
||||
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
|
||||
Put8x8uv(0x80, dst);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Edge filtering functions
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
// 4 pixels in, 2 pixels out
|
||||
static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
|
||||
static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
|
||||
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892]
|
||||
const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15]
|
||||
@ -491,7 +474,7 @@ static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
|
||||
}
|
||||
|
||||
// 4 pixels in, 4 pixels out
|
||||
static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
|
||||
static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
|
||||
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
const int a = 3 * (q0 - p0);
|
||||
const int a1 = VP8ksclip2[(a + 4) >> 3];
|
||||
@ -504,7 +487,7 @@ static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
|
||||
}
|
||||
|
||||
// 6 pixels in, 6 pixels out
|
||||
static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
|
||||
static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
|
||||
const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
|
||||
const int q0 = p[0], q1 = p[step], q2 = p[2*step];
|
||||
const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
|
||||
@ -520,22 +503,18 @@ static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
|
||||
p[ 2*step] = VP8kclip1[q2 - a3];
|
||||
}
|
||||
|
||||
static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
|
||||
static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
|
||||
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
|
||||
static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
|
||||
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
|
||||
int step, int t, int it) {
|
||||
static WEBP_INLINE int needs_filter2(const uint8_t* p,
|
||||
int step, int t, int it) {
|
||||
const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
|
||||
const int p0 = p[-step], q0 = p[0];
|
||||
const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
|
||||
@ -544,159 +523,140 @@ static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
|
||||
VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
|
||||
VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple In-loop filtering (Paragraph 15.2)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
|
||||
int i;
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
if (NeedsFilter_C(p + i, stride, thresh2)) {
|
||||
DoFilter2_C(p + i, stride);
|
||||
if (needs_filter(p + i, stride, thresh2)) {
|
||||
do_filter2(p + i, stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
|
||||
int i;
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
if (NeedsFilter_C(p + i * stride, 1, thresh2)) {
|
||||
DoFilter2_C(p + i * stride, 1);
|
||||
if (needs_filter(p + i * stride, 1, thresh2)) {
|
||||
do_filter2(p + i * stride, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4 * stride;
|
||||
SimpleVFilter16_C(p, stride, thresh);
|
||||
SimpleVFilter16(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4;
|
||||
SimpleHFilter16_C(p, stride, thresh);
|
||||
SimpleHFilter16(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Complex In-loop filtering (Paragraph 15.3)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh,
|
||||
int hev_thresh) {
|
||||
static WEBP_INLINE void FilterLoop26(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
while (size-- > 0) {
|
||||
if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
|
||||
if (Hev(p, hstride, hev_thresh)) {
|
||||
DoFilter2_C(p, hstride);
|
||||
if (needs_filter2(p, hstride, thresh2, ithresh)) {
|
||||
if (hev(p, hstride, hev_thresh)) {
|
||||
do_filter2(p, hstride);
|
||||
} else {
|
||||
DoFilter6_C(p, hstride);
|
||||
do_filter6(p, hstride);
|
||||
}
|
||||
}
|
||||
p += vstride;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh,
|
||||
int hev_thresh) {
|
||||
static WEBP_INLINE void FilterLoop24(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
while (size-- > 0) {
|
||||
if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
|
||||
if (Hev(p, hstride, hev_thresh)) {
|
||||
DoFilter2_C(p, hstride);
|
||||
if (needs_filter2(p, hstride, thresh2, ithresh)) {
|
||||
if (hev(p, hstride, hev_thresh)) {
|
||||
do_filter2(p, hstride);
|
||||
} else {
|
||||
DoFilter4_C(p, hstride);
|
||||
do_filter4(p, hstride);
|
||||
}
|
||||
}
|
||||
p += vstride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// on macroblock edges
|
||||
static void VFilter16_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
static void VFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter16_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
static void HFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
// on three inner edges
|
||||
static void VFilter16i_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4 * stride;
|
||||
FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter16i_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4;
|
||||
FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
static void DitherCombine8x8(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
int i, j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -740,6 +700,7 @@ extern void VP8DspInitNEON(void);
|
||||
extern void VP8DspInitMIPS32(void);
|
||||
extern void VP8DspInitMIPSdspR2(void);
|
||||
extern void VP8DspInitMSA(void);
|
||||
extern void VP8DspInitWASM(void);
|
||||
|
||||
static volatile VP8CPUInfo dec_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&dec_last_cpuinfo_used;
|
||||
@ -749,66 +710,54 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
||||
|
||||
VP8InitClipTables();
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8TransformWHT = TransformWHT_C;
|
||||
VP8Transform = TransformTwo_C;
|
||||
VP8TransformDC = TransformDC_C;
|
||||
VP8TransformAC3 = TransformAC3_C;
|
||||
#endif
|
||||
VP8TransformUV = TransformUV_C;
|
||||
VP8TransformDCUV = TransformDCUV_C;
|
||||
VP8TransformWHT = TransformWHT;
|
||||
VP8Transform = TransformTwo;
|
||||
VP8TransformUV = TransformUV;
|
||||
VP8TransformDC = TransformDC;
|
||||
VP8TransformDCUV = TransformDCUV;
|
||||
VP8TransformAC3 = TransformAC3;
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8VFilter16 = VFilter16_C;
|
||||
VP8VFilter16i = VFilter16i_C;
|
||||
VP8HFilter16 = HFilter16_C;
|
||||
VP8VFilter8 = VFilter8_C;
|
||||
VP8VFilter8i = VFilter8i_C;
|
||||
VP8SimpleVFilter16 = SimpleVFilter16_C;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16_C;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i_C;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i_C;
|
||||
#endif
|
||||
VP8VFilter16 = VFilter16;
|
||||
VP8HFilter16 = HFilter16;
|
||||
VP8VFilter8 = VFilter8;
|
||||
VP8HFilter8 = HFilter8;
|
||||
VP8VFilter16i = VFilter16i;
|
||||
VP8HFilter16i = HFilter16i;
|
||||
VP8VFilter8i = VFilter8i;
|
||||
VP8HFilter8i = HFilter8i;
|
||||
VP8SimpleVFilter16 = SimpleVFilter16;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i;
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
VP8HFilter16i = HFilter16i_C;
|
||||
VP8HFilter8 = HFilter8_C;
|
||||
VP8HFilter8i = HFilter8i_C;
|
||||
#endif
|
||||
VP8PredLuma4[0] = DC4;
|
||||
VP8PredLuma4[1] = TM4;
|
||||
VP8PredLuma4[2] = VE4;
|
||||
VP8PredLuma4[3] = HE4;
|
||||
VP8PredLuma4[4] = RD4;
|
||||
VP8PredLuma4[5] = VR4;
|
||||
VP8PredLuma4[6] = LD4;
|
||||
VP8PredLuma4[7] = VL4;
|
||||
VP8PredLuma4[8] = HD4;
|
||||
VP8PredLuma4[9] = HU4;
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8PredLuma4[0] = DC4_C;
|
||||
VP8PredLuma4[1] = TM4_C;
|
||||
VP8PredLuma4[2] = VE4_C;
|
||||
VP8PredLuma4[4] = RD4_C;
|
||||
VP8PredLuma4[6] = LD4_C;
|
||||
#endif
|
||||
VP8PredLuma16[0] = DC16;
|
||||
VP8PredLuma16[1] = TM16;
|
||||
VP8PredLuma16[2] = VE16;
|
||||
VP8PredLuma16[3] = HE16;
|
||||
VP8PredLuma16[4] = DC16NoTop;
|
||||
VP8PredLuma16[5] = DC16NoLeft;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft;
|
||||
|
||||
VP8PredLuma4[3] = HE4_C;
|
||||
VP8PredLuma4[5] = VR4_C;
|
||||
VP8PredLuma4[7] = VL4_C;
|
||||
VP8PredLuma4[8] = HD4_C;
|
||||
VP8PredLuma4[9] = HU4_C;
|
||||
VP8PredChroma8[0] = DC8uv;
|
||||
VP8PredChroma8[1] = TM8uv;
|
||||
VP8PredChroma8[2] = VE8uv;
|
||||
VP8PredChroma8[3] = HE8uv;
|
||||
VP8PredChroma8[4] = DC8uvNoTop;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft;
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8PredLuma16[0] = DC16_C;
|
||||
VP8PredLuma16[1] = TM16_C;
|
||||
VP8PredLuma16[2] = VE16_C;
|
||||
VP8PredLuma16[3] = HE16_C;
|
||||
VP8PredLuma16[4] = DC16NoTop_C;
|
||||
VP8PredLuma16[5] = DC16NoLeft_C;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft_C;
|
||||
|
||||
VP8PredChroma8[0] = DC8uv_C;
|
||||
VP8PredChroma8[1] = TM8uv_C;
|
||||
VP8PredChroma8[2] = VE8uv_C;
|
||||
VP8PredChroma8[3] = HE8uv_C;
|
||||
VP8PredChroma8[4] = DC8uvNoTop_C;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft_C;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft_C;
|
||||
#endif
|
||||
|
||||
VP8DitherCombine8x8 = DitherCombine8x8_C;
|
||||
VP8DitherCombine8x8 = DitherCombine8x8;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@ -822,6 +771,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8DspInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
if (VP8GetCPUInfo(kMIPS32)) {
|
||||
VP8DspInitMIPS32();
|
||||
@ -837,58 +791,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
||||
VP8DspInitMSA();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8DspInitNEON();
|
||||
}
|
||||
#if defined(WEBP_USE_WASM)
|
||||
if (VP8GetCPUInfo(kWASM)) {
|
||||
VP8DspInitWASM();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(VP8TransformWHT != NULL);
|
||||
assert(VP8Transform != NULL);
|
||||
assert(VP8TransformDC != NULL);
|
||||
assert(VP8TransformAC3 != NULL);
|
||||
assert(VP8TransformUV != NULL);
|
||||
assert(VP8TransformDCUV != NULL);
|
||||
assert(VP8VFilter16 != NULL);
|
||||
assert(VP8HFilter16 != NULL);
|
||||
assert(VP8VFilter8 != NULL);
|
||||
assert(VP8HFilter8 != NULL);
|
||||
assert(VP8VFilter16i != NULL);
|
||||
assert(VP8HFilter16i != NULL);
|
||||
assert(VP8VFilter8i != NULL);
|
||||
assert(VP8HFilter8i != NULL);
|
||||
assert(VP8SimpleVFilter16 != NULL);
|
||||
assert(VP8SimpleHFilter16 != NULL);
|
||||
assert(VP8SimpleVFilter16i != NULL);
|
||||
assert(VP8SimpleHFilter16i != NULL);
|
||||
assert(VP8PredLuma4[0] != NULL);
|
||||
assert(VP8PredLuma4[1] != NULL);
|
||||
assert(VP8PredLuma4[2] != NULL);
|
||||
assert(VP8PredLuma4[3] != NULL);
|
||||
assert(VP8PredLuma4[4] != NULL);
|
||||
assert(VP8PredLuma4[5] != NULL);
|
||||
assert(VP8PredLuma4[6] != NULL);
|
||||
assert(VP8PredLuma4[7] != NULL);
|
||||
assert(VP8PredLuma4[8] != NULL);
|
||||
assert(VP8PredLuma4[9] != NULL);
|
||||
assert(VP8PredLuma16[0] != NULL);
|
||||
assert(VP8PredLuma16[1] != NULL);
|
||||
assert(VP8PredLuma16[2] != NULL);
|
||||
assert(VP8PredLuma16[3] != NULL);
|
||||
assert(VP8PredLuma16[4] != NULL);
|
||||
assert(VP8PredLuma16[5] != NULL);
|
||||
assert(VP8PredLuma16[6] != NULL);
|
||||
assert(VP8PredChroma8[0] != NULL);
|
||||
assert(VP8PredChroma8[1] != NULL);
|
||||
assert(VP8PredChroma8[2] != NULL);
|
||||
assert(VP8PredChroma8[3] != NULL);
|
||||
assert(VP8PredChroma8[4] != NULL);
|
||||
assert(VP8PredChroma8[5] != NULL);
|
||||
assert(VP8PredChroma8[6] != NULL);
|
||||
assert(VP8DitherCombine8x8 != NULL);
|
||||
|
||||
}
|
||||
dec_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
|
@ -11,14 +11,11 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
// define to 0 to have run-time table initialization
|
||||
#if !defined(USE_STATIC_TABLES)
|
||||
#define USE_STATIC_TABLES 1 // ALTERNATE_CODE
|
||||
#endif
|
||||
#define USE_STATIC_TABLES // undefine to have run-time table initialization
|
||||
|
||||
#if (USE_STATIC_TABLES == 1)
|
||||
#ifdef USE_STATIC_TABLES
|
||||
|
||||
static const uint8_t abs0[255 + 255 + 1] = {
|
||||
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
|
||||
@ -340,7 +337,7 @@ static uint8_t clip1[255 + 511 + 1];
|
||||
// and make sure it's set to true _last_ (so as to be thread-safe)
|
||||
static volatile int tables_ok = 0;
|
||||
|
||||
#endif // USE_STATIC_TABLES
|
||||
#endif
|
||||
|
||||
const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
|
||||
const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
|
||||
@ -348,7 +345,7 @@ const uint8_t* const VP8kclip1 = &clip1[255];
|
||||
const uint8_t* const VP8kabs0 = &abs0[255];
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
|
||||
#if (USE_STATIC_TABLES == 0)
|
||||
#if !defined(USE_STATIC_TABLES)
|
||||
int i;
|
||||
if (!tables_ok) {
|
||||
for (i = -255; i <= 255; ++i) {
|
||||
|
@ -12,11 +12,11 @@
|
||||
// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
|
||||
#include "src/dsp/mips_macro.h"
|
||||
#include "./mips_macro.h"
|
||||
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
static const int kC2 = 35468;
|
||||
|
@ -12,11 +12,11 @@
|
||||
// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
#include "src/dsp/mips_macro.h"
|
||||
#include "./mips_macro.h"
|
||||
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
static const int kC2 = 35468;
|
||||
|
@ -12,11 +12,11 @@
|
||||
// Author(s): Prashant Patil (prashant.patil@imgtec.com)
|
||||
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MSA)
|
||||
|
||||
#include "src/dsp/msa_macro.h"
|
||||
#include "./msa_macro.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -12,25 +12,23 @@
|
||||
// Author: somnath@google.com (Somnath Banerjee)
|
||||
// cduvivier@google.com (Christian Duvivier)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
// The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
|
||||
// one it seems => disable it by default. Uncomment the following to enable:
|
||||
#if !defined(USE_TRANSFORM_AC3)
|
||||
#define USE_TRANSFORM_AC3 0 // ALTERNATE_CODE
|
||||
#endif
|
||||
// #define USE_TRANSFORM_AC3
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "./common_sse2.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -195,7 +193,7 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
}
|
||||
}
|
||||
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
#if defined(USE_TRANSFORM_AC3)
|
||||
#define MUL(a, b) (((a) * (b)) >> 16)
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
@ -250,7 +248,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
_mm_subs_epu8((p), (q)))
|
||||
|
||||
// Shift each byte of "x" by 3 bits while preserving by the sign bit.
|
||||
static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
|
||||
static WEBP_INLINE void SignedShift8b(__m128i* const x) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i lo_0 = _mm_unpacklo_epi8(zero, *x);
|
||||
const __m128i hi_0 = _mm_unpackhi_epi8(zero, *x);
|
||||
@ -260,8 +258,8 @@ static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
|
||||
}
|
||||
|
||||
#define FLIP_SIGN_BIT2(a, b) { \
|
||||
(a) = _mm_xor_si128(a, sign_bit); \
|
||||
(b) = _mm_xor_si128(b, sign_bit); \
|
||||
a = _mm_xor_si128(a, sign_bit); \
|
||||
b = _mm_xor_si128(b, sign_bit); \
|
||||
}
|
||||
|
||||
#define FLIP_SIGN_BIT4(a, b, c, d) { \
|
||||
@ -270,11 +268,11 @@ static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
|
||||
}
|
||||
|
||||
// input/output is uint8_t
|
||||
static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int hev_thresh, __m128i* const not_hev) {
|
||||
static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int hev_thresh, __m128i* const not_hev) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i t_1 = MM_ABS(*p1, *p0);
|
||||
const __m128i t_2 = MM_ABS(*q1, *q0);
|
||||
@ -287,11 +285,11 @@ static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
|
||||
}
|
||||
|
||||
// input pixels are int8_t
|
||||
static WEBP_INLINE void GetBaseDelta_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
__m128i* const delta) {
|
||||
static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
__m128i* const delta) {
|
||||
// beware of addition order, for saturation!
|
||||
const __m128i p1_q1 = _mm_subs_epi8(*p1, *q1); // p1 - q1
|
||||
const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0); // q0 - p0
|
||||
@ -302,16 +300,15 @@ static WEBP_INLINE void GetBaseDelta_SSE2(const __m128i* const p1,
|
||||
}
|
||||
|
||||
// input and output are int8_t
|
||||
static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
|
||||
__m128i* const q0,
|
||||
const __m128i* const fl) {
|
||||
static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
|
||||
const __m128i* const fl) {
|
||||
const __m128i k3 = _mm_set1_epi8(3);
|
||||
const __m128i k4 = _mm_set1_epi8(4);
|
||||
__m128i v3 = _mm_adds_epi8(*fl, k3);
|
||||
__m128i v4 = _mm_adds_epi8(*fl, k4);
|
||||
|
||||
SignedShift8b_SSE2(&v4); // v4 >> 3
|
||||
SignedShift8b_SSE2(&v3); // v3 >> 3
|
||||
SignedShift8b(&v4); // v4 >> 3
|
||||
SignedShift8b(&v3); // v3 >> 3
|
||||
*q0 = _mm_subs_epi8(*q0, v4); // q0 -= v4
|
||||
*p0 = _mm_adds_epi8(*p0, v3); // p0 += v3
|
||||
}
|
||||
@ -320,9 +317,9 @@ static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
|
||||
// Update operations:
|
||||
// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
|
||||
// Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
|
||||
static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
|
||||
const __m128i* const a0_lo,
|
||||
const __m128i* const a0_hi) {
|
||||
static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
|
||||
const __m128i* const a0_lo,
|
||||
const __m128i* const a0_hi) {
|
||||
const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
|
||||
const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
|
||||
const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
|
||||
@ -333,11 +330,11 @@ static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
|
||||
}
|
||||
|
||||
// input pixels are uint8_t
|
||||
static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, __m128i* const mask) {
|
||||
static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, __m128i* const mask) {
|
||||
const __m128i m_thresh = _mm_set1_epi8(thresh);
|
||||
const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1)
|
||||
const __m128i kFE = _mm_set1_epi8(0xFE);
|
||||
@ -356,29 +353,28 @@ static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
|
||||
// Edge filtering functions
|
||||
|
||||
// Applies filter on 2 pixels (p0 and q0)
|
||||
static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
int thresh) {
|
||||
static WEBP_INLINE void DoFilter2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
int thresh) {
|
||||
__m128i a, mask;
|
||||
const __m128i sign_bit = _mm_set1_epi8(0x80);
|
||||
// convert p1/q1 to int8_t (for GetBaseDelta_SSE2)
|
||||
// convert p1/q1 to int8_t (for GetBaseDelta)
|
||||
const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
|
||||
const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
|
||||
|
||||
NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &mask);
|
||||
NeedsFilter(p1, p0, q0, q1, thresh, &mask);
|
||||
|
||||
FLIP_SIGN_BIT2(*p0, *q0);
|
||||
GetBaseDelta_SSE2(&p1s, p0, q0, &q1s, &a);
|
||||
GetBaseDelta(&p1s, p0, q0, &q1s, &a);
|
||||
a = _mm_and_si128(a, mask); // mask filter values we don't care about
|
||||
DoSimpleFilter_SSE2(p0, q0, &a);
|
||||
DoSimpleFilter(p0, q0, &a);
|
||||
FLIP_SIGN_BIT2(*p0, *q0);
|
||||
}
|
||||
|
||||
// Applies filter on 4 pixels (p1, p0, q0 and q1)
|
||||
static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
const __m128i* const mask,
|
||||
int hev_thresh) {
|
||||
static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
const __m128i* const mask, int hev_thresh) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i sign_bit = _mm_set1_epi8(0x80);
|
||||
const __m128i k64 = _mm_set1_epi8(64);
|
||||
@ -388,7 +384,7 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i t1, t2, t3;
|
||||
|
||||
// compute hev mask
|
||||
GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
GetNotHEV(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
|
||||
// convert to signed values
|
||||
FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
|
||||
@ -403,8 +399,8 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
|
||||
|
||||
t2 = _mm_adds_epi8(t1, k3); // 3 * (q0 - p0) + hev(p1 - q1) + 3
|
||||
t3 = _mm_adds_epi8(t1, k4); // 3 * (q0 - p0) + hev(p1 - q1) + 4
|
||||
SignedShift8b_SSE2(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
|
||||
SignedShift8b_SSE2(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
|
||||
SignedShift8b(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
|
||||
SignedShift8b(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
|
||||
*p0 = _mm_adds_epi8(*p0, t2); // p0 += t2
|
||||
*q0 = _mm_subs_epi8(*q0, t3); // q0 -= t3
|
||||
FLIP_SIGN_BIT2(*p0, *q0);
|
||||
@ -421,26 +417,25 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
|
||||
}
|
||||
|
||||
// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
|
||||
static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
|
||||
__m128i* const p0, __m128i* const q0,
|
||||
__m128i* const q1, __m128i* const q2,
|
||||
const __m128i* const mask,
|
||||
int hev_thresh) {
|
||||
static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
|
||||
__m128i* const p0, __m128i* const q0,
|
||||
__m128i* const q1, __m128i* const q2,
|
||||
const __m128i* const mask, int hev_thresh) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i sign_bit = _mm_set1_epi8(0x80);
|
||||
__m128i a, not_hev;
|
||||
|
||||
// compute hev mask
|
||||
GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
GetNotHEV(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
|
||||
FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
|
||||
FLIP_SIGN_BIT2(*p2, *q2);
|
||||
GetBaseDelta_SSE2(p1, p0, q0, q1, &a);
|
||||
GetBaseDelta(p1, p0, q0, q1, &a);
|
||||
|
||||
{ // do simple filter on pixels with hev
|
||||
const __m128i m = _mm_andnot_si128(not_hev, *mask);
|
||||
const __m128i f = _mm_and_si128(a, m);
|
||||
DoSimpleFilter_SSE2(p0, q0, &f);
|
||||
DoSimpleFilter(p0, q0, &f);
|
||||
}
|
||||
|
||||
{ // do strong filter on pixels with not hev
|
||||
@ -465,15 +460,15 @@ static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
|
||||
const __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo); // Filter * 27 + 63
|
||||
const __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi); // Filter * 27 + 63
|
||||
|
||||
Update2Pixels_SSE2(p2, q2, &a2_lo, &a2_hi);
|
||||
Update2Pixels_SSE2(p1, q1, &a1_lo, &a1_hi);
|
||||
Update2Pixels_SSE2(p0, q0, &a0_lo, &a0_hi);
|
||||
Update2Pixels(p2, q2, &a2_lo, &a2_hi);
|
||||
Update2Pixels(p1, q1, &a1_lo, &a1_hi);
|
||||
Update2Pixels(p0, q0, &a0_lo, &a0_hi);
|
||||
}
|
||||
}
|
||||
|
||||
// reads 8 rows across a vertical edge.
|
||||
static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
|
||||
__m128i* const p, __m128i* const q) {
|
||||
static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
|
||||
__m128i* const p, __m128i* const q) {
|
||||
// A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
|
||||
// A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
|
||||
const __m128i A0 = _mm_set_epi32(
|
||||
@ -499,11 +494,11 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
|
||||
*q = _mm_unpackhi_epi32(C0, C1);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
|
||||
const uint8_t* const r8,
|
||||
int stride,
|
||||
__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1) {
|
||||
static WEBP_INLINE void Load16x4(const uint8_t* const r0,
|
||||
const uint8_t* const r8,
|
||||
int stride,
|
||||
__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1) {
|
||||
// Assume the pixels around the edge (|) are numbered as follows
|
||||
// 00 01 | 02 03
|
||||
// 10 11 | 12 13
|
||||
@ -519,8 +514,8 @@ static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
|
||||
// q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
|
||||
// p0 = f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
|
||||
// q1 = f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
|
||||
Load8x4_SSE2(r0, stride, p1, q0);
|
||||
Load8x4_SSE2(r8, stride, p0, q1);
|
||||
Load8x4(r0, stride, p1, q0);
|
||||
Load8x4(r8, stride, p0, q1);
|
||||
|
||||
{
|
||||
// p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
|
||||
@ -536,8 +531,7 @@ static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
|
||||
uint8_t* dst, int stride) {
|
||||
static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i, dst += stride) {
|
||||
WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
|
||||
@ -546,12 +540,12 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
|
||||
}
|
||||
|
||||
// Transpose back and store
|
||||
static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
uint8_t* r0, uint8_t* r8,
|
||||
int stride) {
|
||||
static WEBP_INLINE void Store16x4(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
uint8_t* r0, uint8_t* r8,
|
||||
int stride) {
|
||||
__m128i t1, p1_s, p0_s, q0_s, q1_s;
|
||||
|
||||
// p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
|
||||
@ -578,55 +572,55 @@ static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
|
||||
p1_s = _mm_unpacklo_epi16(t1, q1_s);
|
||||
q1_s = _mm_unpackhi_epi16(t1, q1_s);
|
||||
|
||||
Store4x4_SSE2(&p0_s, r0, stride);
|
||||
Store4x4(&p0_s, r0, stride);
|
||||
r0 += 4 * stride;
|
||||
Store4x4_SSE2(&q0_s, r0, stride);
|
||||
Store4x4(&q0_s, r0, stride);
|
||||
|
||||
Store4x4_SSE2(&p1_s, r8, stride);
|
||||
Store4x4(&p1_s, r8, stride);
|
||||
r8 += 4 * stride;
|
||||
Store4x4_SSE2(&q1_s, r8, stride);
|
||||
Store4x4(&q1_s, r8, stride);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple In-loop filtering (Paragraph 15.2)
|
||||
|
||||
static void SimpleVFilter16_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
|
||||
// Load
|
||||
__m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
|
||||
__m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
|
||||
__m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
|
||||
__m128i q1 = _mm_loadu_si128((__m128i*)&p[stride]);
|
||||
|
||||
DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
|
||||
DoFilter2(&p1, &p0, &q0, &q1, thresh);
|
||||
|
||||
// Store
|
||||
_mm_storeu_si128((__m128i*)&p[-stride], p0);
|
||||
_mm_storeu_si128((__m128i*)&p[0], q0);
|
||||
}
|
||||
|
||||
static void SimpleHFilter16_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
|
||||
__m128i p1, p0, q0, q1;
|
||||
|
||||
p -= 2; // beginning of p1
|
||||
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
|
||||
DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
|
||||
Store16x4_SSE2(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
|
||||
Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
|
||||
DoFilter2(&p1, &p0, &q0, &q1, thresh);
|
||||
Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
|
||||
}
|
||||
|
||||
static void SimpleVFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4 * stride;
|
||||
SimpleVFilter16_SSE2(p, stride, thresh);
|
||||
SimpleVFilter16(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4;
|
||||
SimpleHFilter16_SSE2(p, stride, thresh);
|
||||
SimpleHFilter16(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
|
||||
@ -634,60 +628,60 @@ static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
// Complex In-loop filtering (Paragraph 15.3)
|
||||
|
||||
#define MAX_DIFF1(p3, p2, p1, p0, m) do { \
|
||||
(m) = MM_ABS(p1, p0); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
m = MM_ABS(p1, p0); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
} while (0)
|
||||
|
||||
#define MAX_DIFF2(p3, p2, p1, p0, m) do { \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p1, p0)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
} while (0)
|
||||
|
||||
#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) { \
|
||||
(e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]); \
|
||||
(e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]); \
|
||||
(e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]); \
|
||||
(e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]); \
|
||||
e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]); \
|
||||
e2 = _mm_loadu_si128((__m128i*)&(p)[1 * stride]); \
|
||||
e3 = _mm_loadu_si128((__m128i*)&(p)[2 * stride]); \
|
||||
e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]); \
|
||||
}
|
||||
|
||||
#define LOADUV_H_EDGE(p, u, v, stride) do { \
|
||||
const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]); \
|
||||
const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]); \
|
||||
(p) = _mm_unpacklo_epi64(U, V); \
|
||||
p = _mm_unpacklo_epi64(U, V); \
|
||||
} while (0)
|
||||
|
||||
#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) { \
|
||||
LOADUV_H_EDGE(e1, u, v, 0 * (stride)); \
|
||||
LOADUV_H_EDGE(e2, u, v, 1 * (stride)); \
|
||||
LOADUV_H_EDGE(e3, u, v, 2 * (stride)); \
|
||||
LOADUV_H_EDGE(e4, u, v, 3 * (stride)); \
|
||||
LOADUV_H_EDGE(e1, u, v, 0 * stride); \
|
||||
LOADUV_H_EDGE(e2, u, v, 1 * stride); \
|
||||
LOADUV_H_EDGE(e3, u, v, 2 * stride); \
|
||||
LOADUV_H_EDGE(e4, u, v, 3 * stride); \
|
||||
}
|
||||
|
||||
#define STOREUV(p, u, v, stride) { \
|
||||
_mm_storel_epi64((__m128i*)&(u)[(stride)], p); \
|
||||
(p) = _mm_srli_si128(p, 8); \
|
||||
_mm_storel_epi64((__m128i*)&(v)[(stride)], p); \
|
||||
_mm_storel_epi64((__m128i*)&u[(stride)], p); \
|
||||
p = _mm_srli_si128(p, 8); \
|
||||
_mm_storel_epi64((__m128i*)&v[(stride)], p); \
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, int ithresh,
|
||||
__m128i* const mask) {
|
||||
static WEBP_INLINE void ComplexMask(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, int ithresh,
|
||||
__m128i* const mask) {
|
||||
const __m128i it = _mm_set1_epi8(ithresh);
|
||||
const __m128i diff = _mm_subs_epu8(*mask, it);
|
||||
const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
|
||||
__m128i filter_mask;
|
||||
NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &filter_mask);
|
||||
NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
|
||||
*mask = _mm_and_si128(thresh_mask, filter_mask);
|
||||
}
|
||||
|
||||
// on macroblock edges
|
||||
static void VFilter16_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i t1;
|
||||
__m128i mask;
|
||||
__m128i p2, p1, p0, q0, q1, q2;
|
||||
@ -700,8 +694,8 @@ static void VFilter16_SSE2(uint8_t* p, int stride,
|
||||
LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
|
||||
MAX_DIFF2(t1, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
_mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
|
||||
@ -712,28 +706,28 @@ static void VFilter16_SSE2(uint8_t* p, int stride,
|
||||
_mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
|
||||
}
|
||||
|
||||
static void HFilter16_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
uint8_t* const b = p - 4;
|
||||
Load16x4_SSE2(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);
|
||||
Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
|
||||
MAX_DIFF1(p3, p2, p1, p0, mask);
|
||||
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
|
||||
Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
|
||||
MAX_DIFF2(q3, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
Store16x4_SSE2(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
|
||||
Store16x4(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
|
||||
Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
|
||||
}
|
||||
|
||||
// on three inner edges
|
||||
static void VFilter16i_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
__m128i p3, p2, p1, p0; // loop invariants
|
||||
|
||||
@ -750,8 +744,8 @@ static void VFilter16i_SSE2(uint8_t* p, int stride,
|
||||
|
||||
// p3 and p2 are not just temporary variables here: they will be
|
||||
// re-used for next span. And q2/q3 will become p1/p0 accordingly.
|
||||
ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
_mm_storeu_si128((__m128i*)&b[0 * stride], p1);
|
||||
@ -765,12 +759,12 @@ static void VFilter16i_SSE2(uint8_t* p, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void HFilter16i_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
__m128i p3, p2, p1, p0; // loop invariants
|
||||
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
|
||||
Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
|
||||
|
||||
for (k = 3; k > 0; --k) {
|
||||
__m128i mask, tmp1, tmp2;
|
||||
@ -779,13 +773,13 @@ static void HFilter16i_SSE2(uint8_t* p, int stride,
|
||||
p += 4; // beginning of q0 (and next span)
|
||||
|
||||
MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
|
||||
Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
|
||||
MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
|
||||
|
||||
ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
|
||||
Store16x4_SSE2(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
|
||||
Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
|
||||
|
||||
// rotate samples
|
||||
p1 = tmp1;
|
||||
@ -794,8 +788,8 @@ static void HFilter16i_SSE2(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, p2, p1, p0, q0, q1, q2;
|
||||
|
||||
@ -807,8 +801,8 @@ static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
|
||||
MAX_DIFF2(t1, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
STOREUV(p2, u, v, -3 * stride);
|
||||
@ -819,28 +813,28 @@ static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q2, u, v, 2 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
uint8_t* const tu = u - 4;
|
||||
uint8_t* const tv = v - 4;
|
||||
Load16x4_SSE2(tu, tv, stride, &p3, &p2, &p1, &p0);
|
||||
Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
|
||||
MAX_DIFF1(p3, p2, p1, p0, mask);
|
||||
|
||||
Load16x4_SSE2(u, v, stride, &q0, &q1, &q2, &q3);
|
||||
Load16x4(u, v, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
|
||||
MAX_DIFF2(q3, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
Store16x4_SSE2(&p3, &p2, &p1, &p0, tu, tv, stride);
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
Store16x4(&p3, &p2, &p1, &p0, tu, tv, stride);
|
||||
Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
|
||||
@ -855,8 +849,8 @@ static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
|
||||
MAX_DIFF2(t2, t1, q1, q0, mask);
|
||||
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
STOREUV(p1, u, v, -2 * stride);
|
||||
@ -865,24 +859,24 @@ static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q1, u, v, 1 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
|
||||
Load16x4(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
|
||||
MAX_DIFF1(t2, t1, p1, p0, mask);
|
||||
|
||||
u += 4; // beginning of q0
|
||||
v += 4;
|
||||
Load16x4_SSE2(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
|
||||
Load16x4(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
|
||||
MAX_DIFF2(t2, t1, q1, q0, mask);
|
||||
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
|
||||
u -= 2; // beginning of p1
|
||||
v -= 2;
|
||||
Store16x4_SSE2(&p1, &p0, &q0, &q1, u, v, stride);
|
||||
Store16x4(&p1, &p0, &q0, &q1, u, v, stride);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -899,7 +893,7 @@ static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
|
||||
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
|
||||
|
||||
static void VE4_SSE2(uint8_t* dst) { // vertical
|
||||
static void VE4(uint8_t* dst) { // vertical
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -915,7 +909,7 @@ static void VE4_SSE2(uint8_t* dst) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void LD4_SSE2(uint8_t* dst) { // Down-Left
|
||||
static void LD4(uint8_t* dst) { // Down-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -931,7 +925,7 @@ static void LD4_SSE2(uint8_t* dst) { // Down-Left
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
|
||||
static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
@ -956,7 +950,7 @@ static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
}
|
||||
|
||||
static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
|
||||
static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
|
||||
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -981,7 +975,7 @@ static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
|
||||
DST(3, 3) = (extra_out >> 8) & 0xff;
|
||||
}
|
||||
|
||||
static void RD4_SSE2(uint8_t* dst) { // Down-right
|
||||
static void RD4(uint8_t* dst) { // Down-right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
|
||||
const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
|
||||
@ -1010,7 +1004,7 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right
|
||||
//------------------------------------------------------------------------------
|
||||
// Luma 16x16
|
||||
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
const uint8_t* top = dst - BPS;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
@ -1047,11 +1041,11 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static void TM4_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 4); }
|
||||
static void TM8uv_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 8); }
|
||||
static void TM16_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 16); }
|
||||
static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
|
||||
static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
|
||||
static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
|
||||
|
||||
static void VE16_SSE2(uint8_t* dst) {
|
||||
static void VE16(uint8_t* dst) {
|
||||
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -1059,7 +1053,7 @@ static void VE16_SSE2(uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void HE16_SSE2(uint8_t* dst) { // horizontal
|
||||
static void HE16(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 16; j > 0; --j) {
|
||||
const __m128i values = _mm_set1_epi8(dst[-1]);
|
||||
@ -1068,7 +1062,7 @@ static void HE16_SSE2(uint8_t* dst) { // horizontal
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -1076,7 +1070,7 @@ static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC16_SSE2(uint8_t* dst) { // DC
|
||||
static void DC16(uint8_t* dst) { // DC
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
|
||||
const __m128i sad8x2 = _mm_sad_epu8(top, zero);
|
||||
@ -1089,37 +1083,37 @@ static void DC16_SSE2(uint8_t* dst) { // DC
|
||||
}
|
||||
{
|
||||
const int DC = _mm_cvtsi128_si32(sum) + left + 16;
|
||||
Put16_SSE2(DC >> 5, dst);
|
||||
Put16(DC >> 5, dst);
|
||||
}
|
||||
}
|
||||
|
||||
static void DC16NoTop_SSE2(uint8_t* dst) { // DC with top samples unavailable
|
||||
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
|
||||
int DC = 8;
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
DC += dst[-1 + j * BPS];
|
||||
}
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
Put16(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoLeft_SSE2(uint8_t* dst) { // DC with left samples unavailable
|
||||
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
|
||||
const __m128i sad8x2 = _mm_sad_epu8(top, zero);
|
||||
// sum the two sads: sad8x2[0:1] + sad8x2[8:9]
|
||||
const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
|
||||
const int DC = _mm_cvtsi128_si32(sum) + 8;
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
Put16(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoTopLeft_SSE2(uint8_t* dst) { // DC with no top & left samples
|
||||
Put16_SSE2(0x80, dst);
|
||||
static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
|
||||
Put16(0x80, dst);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma
|
||||
|
||||
static void VE8uv_SSE2(uint8_t* dst) { // vertical
|
||||
static void VE8uv(uint8_t* dst) { // vertical
|
||||
int j;
|
||||
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -1127,8 +1121,17 @@ static void VE8uv_SSE2(uint8_t* dst) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void HE8uv(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(dst[-1]);
|
||||
_mm_storel_epi64((__m128i*)dst, values);
|
||||
dst += BPS;
|
||||
}
|
||||
}
|
||||
|
||||
// helper for chroma-DC predictions
|
||||
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -1136,7 +1139,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC8uv_SSE2(uint8_t* dst) { // DC
|
||||
static void DC8uv(uint8_t* dst) { // DC
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
|
||||
const __m128i sum = _mm_sad_epu8(top, zero);
|
||||
@ -1147,29 +1150,29 @@ static void DC8uv_SSE2(uint8_t* dst) { // DC
|
||||
}
|
||||
{
|
||||
const int DC = _mm_cvtsi128_si32(sum) + left + 8;
|
||||
Put8x8uv_SSE2(DC >> 4, dst);
|
||||
Put8x8uv(DC >> 4, dst);
|
||||
}
|
||||
}
|
||||
|
||||
static void DC8uvNoLeft_SSE2(uint8_t* dst) { // DC with no left samples
|
||||
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
|
||||
const __m128i sum = _mm_sad_epu8(top, zero);
|
||||
const int DC = _mm_cvtsi128_si32(sum) + 4;
|
||||
Put8x8uv_SSE2(DC >> 3, dst);
|
||||
Put8x8uv(DC >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTop_SSE2(uint8_t* dst) { // DC with no top samples
|
||||
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
|
||||
int dc0 = 4;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
dc0 += dst[-1 + i * BPS];
|
||||
}
|
||||
Put8x8uv_SSE2(dc0 >> 3, dst);
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTopLeft_SSE2(uint8_t* dst) { // DC with nothing
|
||||
Put8x8uv_SSE2(0x80, dst);
|
||||
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
|
||||
Put8x8uv(0x80, dst);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -1178,46 +1181,47 @@ static void DC8uvNoTopLeft_SSE2(uint8_t* dst) { // DC with nothing
|
||||
extern void VP8DspInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
|
||||
VP8Transform = Transform_SSE2;
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
VP8TransformAC3 = TransformAC3_SSE2;
|
||||
VP8Transform = Transform;
|
||||
#if defined(USE_TRANSFORM_AC3)
|
||||
VP8TransformAC3 = TransformAC3;
|
||||
#endif
|
||||
|
||||
VP8VFilter16 = VFilter16_SSE2;
|
||||
VP8HFilter16 = HFilter16_SSE2;
|
||||
VP8VFilter8 = VFilter8_SSE2;
|
||||
VP8HFilter8 = HFilter8_SSE2;
|
||||
VP8VFilter16i = VFilter16i_SSE2;
|
||||
VP8HFilter16i = HFilter16i_SSE2;
|
||||
VP8VFilter8i = VFilter8i_SSE2;
|
||||
VP8HFilter8i = HFilter8i_SSE2;
|
||||
VP8VFilter16 = VFilter16;
|
||||
VP8HFilter16 = HFilter16;
|
||||
VP8VFilter8 = VFilter8;
|
||||
VP8HFilter8 = HFilter8;
|
||||
VP8VFilter16i = VFilter16i;
|
||||
VP8HFilter16i = HFilter16i;
|
||||
VP8VFilter8i = VFilter8i;
|
||||
VP8HFilter8i = HFilter8i;
|
||||
|
||||
VP8SimpleVFilter16 = SimpleVFilter16_SSE2;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16_SSE2;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i_SSE2;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i_SSE2;
|
||||
VP8SimpleVFilter16 = SimpleVFilter16;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i;
|
||||
|
||||
VP8PredLuma4[1] = TM4_SSE2;
|
||||
VP8PredLuma4[2] = VE4_SSE2;
|
||||
VP8PredLuma4[4] = RD4_SSE2;
|
||||
VP8PredLuma4[5] = VR4_SSE2;
|
||||
VP8PredLuma4[6] = LD4_SSE2;
|
||||
VP8PredLuma4[7] = VL4_SSE2;
|
||||
VP8PredLuma4[1] = TM4;
|
||||
VP8PredLuma4[2] = VE4;
|
||||
VP8PredLuma4[4] = RD4;
|
||||
VP8PredLuma4[5] = VR4;
|
||||
VP8PredLuma4[6] = LD4;
|
||||
VP8PredLuma4[7] = VL4;
|
||||
|
||||
VP8PredLuma16[0] = DC16_SSE2;
|
||||
VP8PredLuma16[1] = TM16_SSE2;
|
||||
VP8PredLuma16[2] = VE16_SSE2;
|
||||
VP8PredLuma16[3] = HE16_SSE2;
|
||||
VP8PredLuma16[4] = DC16NoTop_SSE2;
|
||||
VP8PredLuma16[5] = DC16NoLeft_SSE2;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft_SSE2;
|
||||
VP8PredLuma16[0] = DC16;
|
||||
VP8PredLuma16[1] = TM16;
|
||||
VP8PredLuma16[2] = VE16;
|
||||
VP8PredLuma16[3] = HE16;
|
||||
VP8PredLuma16[4] = DC16NoTop;
|
||||
VP8PredLuma16[5] = DC16NoLeft;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft;
|
||||
|
||||
VP8PredChroma8[0] = DC8uv_SSE2;
|
||||
VP8PredChroma8[1] = TM8uv_SSE2;
|
||||
VP8PredChroma8[2] = VE8uv_SSE2;
|
||||
VP8PredChroma8[4] = DC8uvNoTop_SSE2;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft_SSE2;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft_SSE2;
|
||||
VP8PredChroma8[0] = DC8uv;
|
||||
VP8PredChroma8[1] = TM8uv;
|
||||
VP8PredChroma8[2] = VE8uv;
|
||||
VP8PredChroma8[3] = HE8uv;
|
||||
VP8PredChroma8[4] = DC8uvNoTop;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -11,15 +11,15 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
static void HE16_SSE41(uint8_t* dst) { // horizontal
|
||||
static void HE16(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
const __m128i kShuffle3 = _mm_set1_epi8(3);
|
||||
for (j = 16; j > 0; --j) {
|
||||
@ -36,7 +36,7 @@ static void HE16_SSE41(uint8_t* dst) { // horizontal
|
||||
extern void VP8DspInitSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
|
||||
VP8PredLuma16[3] = HE16_SSE41;
|
||||
VP8PredLuma16[3] = HE16;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
1618
src/dsp/dec_wasm.c
Normal file
1618
src/dsp/dec_wasm.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -15,10 +15,10 @@
|
||||
#define WEBP_DSP_DSP_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "src/webp/config.h"
|
||||
#include "../webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "src/webp/types.h"
|
||||
#include "../webp/types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -51,8 +51,9 @@ extern "C" {
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
// for now, none of the optimizations below are available in emscripten
|
||||
#if !defined(EMSCRIPTEN)
|
||||
// For now, none of the optimizations below are available in emscripten.
|
||||
// WebAssembly overrides native optimizations.
|
||||
#if !(defined(EMSCRIPTEN) || defined(WEBP_USE_WASM))
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
@ -104,7 +105,7 @@ extern "C" {
|
||||
#define WEBP_USE_MIPS32
|
||||
#if (__mips_isa_rev >= 2)
|
||||
#define WEBP_USE_MIPS32_R2
|
||||
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
|
||||
#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
|
||||
#define WEBP_USE_MIPS_DSP_R2
|
||||
#endif
|
||||
#endif
|
||||
@ -116,22 +117,6 @@ extern "C" {
|
||||
|
||||
#endif /* EMSCRIPTEN */
|
||||
|
||||
#ifndef WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_DSP_OMIT_C_CODE 1
|
||||
#endif
|
||||
|
||||
#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_NEON_OMIT_C_CODE 1
|
||||
#else
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
#endif
|
||||
|
||||
// This macro prevents thread_sanitizer from reporting known concurrent writes.
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION
|
||||
#if defined(__has_feature)
|
||||
@ -161,18 +146,6 @@ extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#define WEBP_SWAP_16BIT_CSP 0
|
||||
#endif
|
||||
|
||||
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
|
||||
#if !defined(WORDS_BIGENDIAN) && \
|
||||
(defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
|
||||
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
kSSE2,
|
||||
kSSE3,
|
||||
@ -183,11 +156,12 @@ typedef enum {
|
||||
kNEON,
|
||||
kMIPS32,
|
||||
kMIPSdspR2,
|
||||
kMSA
|
||||
kMSA,
|
||||
kWASM
|
||||
} CPUFeature;
|
||||
// returns true if the CPU supports the feature.
|
||||
typedef int (*VP8CPUInfo)(CPUFeature feature);
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Init stub generator
|
||||
@ -315,7 +289,6 @@ typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
|
||||
int xo, int yo, // center position
|
||||
int W, int H); // plane dimension
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
// This version is called with the guarantee that you can load 8 bytes and
|
||||
// 8 rows at offset src1 and src2
|
||||
typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
|
||||
@ -323,13 +296,10 @@ typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
|
||||
|
||||
extern VP8SSIMGetFunc VP8SSIMGet; // unclipped / unchecked
|
||||
extern VP8SSIMGetClippedFunc VP8SSIMGetClipped; // with clipping
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_DISABLE_STATS)
|
||||
typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1,
|
||||
const uint8_t* src2, int len);
|
||||
extern VP8AccumulateSSEFunc VP8AccumulateSSE;
|
||||
#endif
|
||||
|
||||
// must be called before using any of the above directly
|
||||
void VP8SSIMDspInit(void);
|
||||
@ -510,12 +480,12 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
|
||||
extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
|
||||
|
||||
// Plain-C implementation, as fall-back.
|
||||
extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);
|
||||
|
||||
// Main entry calls:
|
||||
extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
|
||||
@ -581,29 +551,25 @@ void WebPMultRows(uint8_t* ptr, int stride,
|
||||
int width, int num_rows, int inverse);
|
||||
|
||||
// Plain-C versions, used as fallback by some implementations.
|
||||
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse);
|
||||
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
// ARGB packing function: a/r/g/b input is rgba or bgra order.
|
||||
extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r,
|
||||
const uint8_t* g, const uint8_t* b, int len,
|
||||
uint32_t* out);
|
||||
#endif
|
||||
|
||||
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
|
||||
extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
|
||||
// This function returns true if src[i] contains a value different from 0xff.
|
||||
extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
|
||||
// This function returns true if src[4*i] contains a value different from 0xff.
|
||||
extern int (*WebPHasAlpha32b)(const uint8_t* src, int length);
|
||||
void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse);
|
||||
void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
|
||||
|
||||
// To be called first before using the above.
|
||||
void WebPInitAlphaProcessing(void);
|
||||
|
||||
// ARGB packing function: a/r/g/b input is rgba or bgra order.
|
||||
extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
|
||||
const uint8_t* g, const uint8_t* b, int len,
|
||||
uint32_t* out);
|
||||
|
||||
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
|
||||
extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
|
||||
// To be called first before using the above.
|
||||
void VP8EncDspARGBInit(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Filter functions
|
||||
|
||||
|
158
src/dsp/enc.c
158
src/dsp/enc.c
@ -14,18 +14,16 @@
|
||||
#include <assert.h>
|
||||
#include <stdlib.h> // for abs()
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "./dsp.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
|
||||
static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int clip_max(int v, int max) {
|
||||
return (v > max) ? max : v;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Compute susceptibility based on DCT-coeff histograms:
|
||||
@ -58,10 +56,9 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
histo->last_non_zero = last_non_zero;
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
@ -79,7 +76,6 @@ static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
|
||||
}
|
||||
VP8SetHistogramData(distribution, histo);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// run-time tables (~4k)
|
||||
@ -104,8 +100,6 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#define STORE(x, y, v) \
|
||||
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
|
||||
|
||||
@ -146,15 +140,15 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
int i;
|
||||
int tmp[16];
|
||||
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
|
||||
@ -182,16 +176,13 @@ static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
VP8FTransform(src, ref, out);
|
||||
VP8FTransform(src + 4, ref + 4, out + 16);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void FTransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
// input is 12b signed
|
||||
int32_t tmp[16];
|
||||
int i;
|
||||
@ -220,7 +211,6 @@ static void FTransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
out[12 + i] = b3 >> 1;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#undef MUL
|
||||
#undef STORE
|
||||
@ -313,8 +303,8 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
@ -333,8 +323,8 @@ static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_C(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DCMode(I16DC16 + dst, left, top, 16, 16, 5);
|
||||
VerticalPred(I16VE16 + dst, top, 16);
|
||||
HorizontalPred(I16HE16 + dst, left, 16);
|
||||
@ -517,7 +507,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -533,7 +523,6 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Metric
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
int w, int h) {
|
||||
int count = 0;
|
||||
@ -549,21 +538,20 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 16, 16);
|
||||
}
|
||||
static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 16, 8);
|
||||
}
|
||||
static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 8, 8);
|
||||
}
|
||||
static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 4, 4);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
|
||||
int k, x, y;
|
||||
for (k = 0; k < 4; ++k) {
|
||||
uint32_t avg = 0;
|
||||
@ -583,7 +571,6 @@ static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// We try to match the spectral content (weighted) between source and
|
||||
// reconstructed samples.
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
@ -621,25 +608,24 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int sum1 = TTransform(a, w);
|
||||
const int sum2 = TTransform(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4_C(a + x + y, b + x + y, w);
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Quantization
|
||||
@ -650,8 +636,8 @@ static const uint8_t kZigzag[16] = {
|
||||
};
|
||||
|
||||
// Simple quantization
|
||||
static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
int last = -1;
|
||||
int n;
|
||||
for (n = 0; n < 16; ++n) {
|
||||
@ -676,15 +662,13 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
return (last >= 0);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Block copy
|
||||
@ -698,11 +682,11 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||
}
|
||||
}
|
||||
|
||||
static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy4x4(const uint8_t* src, uint8_t* dst) {
|
||||
Copy(src, dst, 4, 4);
|
||||
}
|
||||
|
||||
static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy16x8(const uint8_t* src, uint8_t* dst) {
|
||||
Copy(src, dst, 16, 8);
|
||||
}
|
||||
|
||||
@ -750,32 +734,26 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
|
||||
InitTables();
|
||||
|
||||
// default C implementations
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8ITransform = ITransform_C;
|
||||
VP8FTransform = FTransform_C;
|
||||
VP8FTransformWHT = FTransformWHT_C;
|
||||
VP8TDisto4x4 = Disto4x4_C;
|
||||
VP8TDisto16x16 = Disto16x16_C;
|
||||
VP8CollectHistogram = CollectHistogram_C;
|
||||
VP8SSE16x16 = SSE16x16_C;
|
||||
VP8SSE16x8 = SSE16x8_C;
|
||||
VP8SSE8x8 = SSE8x8_C;
|
||||
VP8SSE4x4 = SSE4x4_C;
|
||||
#endif
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
VP8EncQuantizeBlock = QuantizeBlock_C;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_C;
|
||||
#endif
|
||||
|
||||
VP8FTransform2 = FTransform2_C;
|
||||
VP8EncPredLuma4 = Intra4Preds_C;
|
||||
VP8EncPredLuma16 = Intra16Preds_C;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_C;
|
||||
VP8Mean16x4 = Mean16x4_C;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_C;
|
||||
VP8Copy4x4 = Copy4x4_C;
|
||||
VP8Copy16x8 = Copy16x8_C;
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
VP8FTransform2 = FTransform2;
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
VP8EncPredLuma4 = Intra4Preds;
|
||||
VP8EncPredLuma16 = Intra16Preds;
|
||||
VP8EncPredChroma8 = IntraChromaPreds;
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8Mean16x4 = Mean16x4;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock;
|
||||
VP8Copy4x4 = Copy4x4;
|
||||
VP8Copy16x8 = Copy16x8;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@ -794,6 +772,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
|
||||
VP8EncDspInitAVX2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8EncDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
if (VP8GetCPUInfo(kMIPS32)) {
|
||||
VP8EncDspInitMIPS32();
|
||||
@ -810,34 +793,5 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8EncDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(VP8ITransform != NULL);
|
||||
assert(VP8FTransform != NULL);
|
||||
assert(VP8FTransformWHT != NULL);
|
||||
assert(VP8TDisto4x4 != NULL);
|
||||
assert(VP8TDisto16x16 != NULL);
|
||||
assert(VP8CollectHistogram != NULL);
|
||||
assert(VP8SSE16x16 != NULL);
|
||||
assert(VP8SSE16x8 != NULL);
|
||||
assert(VP8SSE8x8 != NULL);
|
||||
assert(VP8SSE4x4 != NULL);
|
||||
assert(VP8EncQuantizeBlock != NULL);
|
||||
assert(VP8EncQuantize2Blocks != NULL);
|
||||
assert(VP8FTransform2 != NULL);
|
||||
assert(VP8EncPredLuma4 != NULL);
|
||||
assert(VP8EncPredLuma16 != NULL);
|
||||
assert(VP8EncPredChroma8 != NULL);
|
||||
assert(VP8Mean16x4 != NULL);
|
||||
assert(VP8EncQuantizeBlockWHT != NULL);
|
||||
assert(VP8Copy4x4 != NULL);
|
||||
assert(VP8Copy16x8 != NULL);
|
||||
|
||||
enc_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
//
|
||||
// AVX2 version of speed-critical encoding functions.
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_AVX2)
|
||||
|
||||
|
@ -13,13 +13,13 @@
|
||||
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||
// Slobodan Prijic (slobodan.prijic@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
|
||||
#include "src/dsp/mips_macro.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "./mips_macro.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
static const int kC2 = 35468;
|
||||
@ -113,9 +113,8 @@ static const int kC2 = 35468;
|
||||
"sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
|
||||
int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
|
||||
@ -145,11 +144,11 @@ static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
ITransformOne_MIPS32(ref, in, dst);
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_MIPS32(ref + 4, in + 16, dst + 4);
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -188,8 +187,8 @@ static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
|
||||
"sh %[temp5], " #J "(%[ppin]) \n\t" \
|
||||
"sh %[level], " #N "(%[pout]) \n\t"
|
||||
|
||||
static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int sign, coeff, level, i;
|
||||
int max_level = MAX_LEVEL;
|
||||
@ -239,11 +238,11 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -362,8 +361,8 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
"msub %[temp6], %[temp0] \n\t" \
|
||||
"msub %[temp7], %[temp1] \n\t"
|
||||
|
||||
static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int tmp[32];
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
|
||||
@ -397,13 +396,13 @@ static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4_MIPS32(a + x + y, b + x + y, w);
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -479,8 +478,7 @@ static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
|
||||
int temp17, temp18, temp19, temp20;
|
||||
@ -541,7 +539,7 @@ static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
|
||||
GET_SSE_INNER(D, D + 1, D + 2, D + 3)
|
||||
|
||||
static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -575,7 +573,7 @@ static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -601,7 +599,7 @@ static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -623,7 +621,7 @@ static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -653,20 +651,17 @@ static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
extern void VP8EncDspInitMIPS32(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPS32(void) {
|
||||
VP8ITransform = ITransform_MIPS32;
|
||||
VP8FTransform = FTransform_MIPS32;
|
||||
|
||||
VP8EncQuantizeBlock = QuantizeBlock_MIPS32;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_MIPS32;
|
||||
|
||||
VP8TDisto4x4 = Disto4x4_MIPS32;
|
||||
VP8TDisto16x16 = Disto16x16_MIPS32;
|
||||
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
VP8SSE16x16 = SSE16x16_MIPS32;
|
||||
VP8SSE8x8 = SSE8x8_MIPS32;
|
||||
VP8SSE16x8 = SSE16x8_MIPS32;
|
||||
VP8SSE4x4 = SSE4x4_MIPS32;
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -12,13 +12,13 @@
|
||||
// Author(s): Darko Laus (darko.laus@imgtec.com)
|
||||
// Mirko Raus (mirko.raus@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
#include "src/dsp/mips_macro.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "./mips_macro.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
static const int kC2 = 35468;
|
||||
@ -141,8 +141,7 @@ static const int kC2 = 35468;
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
const int c2217 = 2217;
|
||||
const int c5352 = 5352;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
@ -239,16 +238,16 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPSdspR2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
|
||||
|
||||
@ -314,14 +313,13 @@ static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
return abs(temp3 - temp17) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4_MIPSdspR2(a + x + y, b + x + y, w);
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -1013,8 +1011,8 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DCMode8(C8DC8 + dst, left, top);
|
||||
VerticalPred8(C8VE8 + dst, top);
|
||||
@ -1033,8 +1031,8 @@ static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DCMode16(I16DC16 + dst, left, top);
|
||||
VerticalPred16(I16VE16 + dst, top);
|
||||
HorizontalPred16(I16HE16 + dst, left);
|
||||
@ -1043,7 +1041,7 @@ static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -1079,7 +1077,7 @@ static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
GET_SSE_INNER(C) \
|
||||
GET_SSE_INNER(D)
|
||||
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1109,7 +1107,7 @@ static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1131,7 +1129,7 @@ static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1149,7 +1147,7 @@ static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1272,8 +1270,8 @@ static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
"usw $0, " #J "(%[ppin]) \n\t" \
|
||||
"3: \n\t"
|
||||
|
||||
static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
|
||||
int sign, coeff, level;
|
||||
int max_level = MAX_LEVEL;
|
||||
@ -1313,11 +1311,11 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
return (ret != 0);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -1360,7 +1358,7 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
"usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
|
||||
"usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
|
||||
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
|
||||
@ -1452,9 +1450,9 @@ static void FTransformWHT_MIPSdspR2(const int16_t* in, int16_t* out) {
|
||||
"addiu %[temp8], %[temp8], 1 \n\t" \
|
||||
"sw %[temp8], 0(%[temp3]) \n\t"
|
||||
|
||||
static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
const int max_coeff = (MAX_COEFF_THRESH << 16) + MAX_COEFF_THRESH;
|
||||
@ -1486,28 +1484,23 @@ static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
|
||||
extern void VP8EncDspInitMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void) {
|
||||
VP8FTransform = FTransform_MIPSdspR2;
|
||||
VP8FTransformWHT = FTransformWHT_MIPSdspR2;
|
||||
VP8ITransform = ITransform_MIPSdspR2;
|
||||
|
||||
VP8TDisto4x4 = Disto4x4_MIPSdspR2;
|
||||
VP8TDisto16x16 = Disto16x16_MIPSdspR2;
|
||||
|
||||
VP8EncPredLuma16 = Intra16Preds_MIPSdspR2;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_MIPSdspR2;
|
||||
VP8EncPredLuma4 = Intra4Preds_MIPSdspR2;
|
||||
|
||||
VP8FTransform = FTransform;
|
||||
VP8ITransform = ITransform;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8EncPredLuma16 = Intra16Preds;
|
||||
VP8EncPredChroma8 = IntraChromaPreds;
|
||||
VP8EncPredLuma4 = Intra4Preds;
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
VP8SSE16x16 = SSE16x16_MIPSdspR2;
|
||||
VP8SSE8x8 = SSE8x8_MIPSdspR2;
|
||||
VP8SSE16x8 = SSE16x8_MIPSdspR2;
|
||||
VP8SSE4x4 = SSE4x4_MIPSdspR2;
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
#endif
|
||||
|
||||
VP8EncQuantizeBlock = QuantizeBlock_MIPSdspR2;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_MIPSdspR2;
|
||||
|
||||
VP8CollectHistogram = CollectHistogram_MIPSdspR2;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
@ -11,13 +11,13 @@
|
||||
//
|
||||
// Author: Prashant Patil (prashant.patil@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MSA)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "src/dsp/msa_macro.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "./msa_macro.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms
|
||||
@ -69,16 +69,15 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void ITransform_MSA(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
uint64_t out0, out1, out2, out3;
|
||||
uint32_t in0, in1, in2, in3;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
@ -131,7 +130,7 @@ static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
SD4(out0, out1, out2, out3, out, 8);
|
||||
}
|
||||
|
||||
static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
v8i16 in0 = { 0 };
|
||||
v8i16 in1 = { 0 };
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3;
|
||||
@ -168,7 +167,7 @@ static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
ST_SH2(out0, out1, out, 8);
|
||||
}
|
||||
|
||||
static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
int sum;
|
||||
uint32_t in0_m, in1_m, in2_m, in3_m;
|
||||
v16i8 src0 = { 0 };
|
||||
@ -200,20 +199,20 @@ static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int sum1 = TTransform_MSA(a, w);
|
||||
const int sum2 = TTransform_MSA(b, w);
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int sum1 = TTransform(a, w);
|
||||
const int sum2 = TTransform(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4_MSA(a + x + y, b + x + y, w);
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -222,9 +221,9 @@ static int Disto16x16_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
//------------------------------------------------------------------------------
|
||||
// Histogram
|
||||
|
||||
static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
@ -431,7 +430,7 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
#undef AVG3
|
||||
#undef AVG2
|
||||
|
||||
static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -548,8 +547,8 @@ static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
|
||||
STORE16x16(out, dst);
|
||||
}
|
||||
|
||||
static void Intra16Preds_MSA(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DCMode16x16(I16DC16 + dst, left, top);
|
||||
VerticalPred16x16(I16VE16 + dst, top);
|
||||
HorizontalPred16x16(I16HE16 + dst, left);
|
||||
@ -670,8 +669,8 @@ static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
STORE8x8(out, dst);
|
||||
}
|
||||
|
||||
static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DCMode8x8(C8DC8 + dst, left, top);
|
||||
VerticalPred8x8(C8VE8 + dst, top);
|
||||
@ -712,7 +711,7 @@ static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -739,7 +738,7 @@ static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -758,7 +757,7 @@ static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -778,7 +777,7 @@ static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum = 0;
|
||||
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
|
||||
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
|
||||
@ -800,8 +799,8 @@ static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Quantization
|
||||
|
||||
static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
int sum;
|
||||
v8i16 in0, in1, sh0, sh1, out0, out1;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
|
||||
@ -853,8 +852,8 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
return (sum > 0);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -867,26 +866,26 @@ static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
|
||||
extern void VP8EncDspInitMSA(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMSA(void) {
|
||||
VP8ITransform = ITransform_MSA;
|
||||
VP8FTransform = FTransform_MSA;
|
||||
VP8FTransformWHT = FTransformWHT_MSA;
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
|
||||
VP8TDisto4x4 = Disto4x4_MSA;
|
||||
VP8TDisto16x16 = Disto16x16_MSA;
|
||||
VP8CollectHistogram = CollectHistogram_MSA;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
|
||||
VP8EncPredLuma4 = Intra4Preds_MSA;
|
||||
VP8EncPredLuma16 = Intra16Preds_MSA;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_MSA;
|
||||
VP8EncPredLuma4 = Intra4Preds;
|
||||
VP8EncPredLuma16 = Intra16Preds;
|
||||
VP8EncPredChroma8 = IntraChromaPreds;
|
||||
|
||||
VP8SSE16x16 = SSE16x16_MSA;
|
||||
VP8SSE16x8 = SSE16x8_MSA;
|
||||
VP8SSE8x8 = SSE8x8_MSA;
|
||||
VP8SSE4x4 = SSE4x4_MSA;
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
|
||||
VP8EncQuantizeBlock = QuantizeBlock_MSA;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_MSA;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_MSA;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MSA
|
||||
|
@ -11,14 +11,14 @@
|
||||
//
|
||||
// adapted from libvpx (http://www.webmproject.org/code/)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "src/dsp/neon.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "./neon.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
@ -37,15 +37,15 @@ static const int16_t kC2 = 17734; // half of kC2, actually. See comment above.
|
||||
#if defined(WEBP_USE_INTRINSICS)
|
||||
|
||||
// Treats 'v' as an uint8x8_t and zero extends to an int16x8_t.
|
||||
static WEBP_INLINE int16x8_t ConvertU8ToS16_NEON(uint32x2_t v) {
|
||||
static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) {
|
||||
return vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(v)));
|
||||
}
|
||||
|
||||
// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
|
||||
// to the corresponding rows of 'dst'.
|
||||
static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
|
||||
const int16x8_t dst01,
|
||||
const int16x8_t dst23) {
|
||||
static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
|
||||
const int16x8_t dst01,
|
||||
const int16x8_t dst23) {
|
||||
// Unsigned saturate to 8b.
|
||||
const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
|
||||
const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
|
||||
@ -57,10 +57,8 @@ static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
|
||||
vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
|
||||
const int16x8_t row23,
|
||||
const uint8_t* const ref,
|
||||
uint8_t* const dst) {
|
||||
static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
|
||||
const uint8_t* const ref, uint8_t* const dst) {
|
||||
uint32x2_t dst01 = vdup_n_u32(0);
|
||||
uint32x2_t dst23 = vdup_n_u32(0);
|
||||
|
||||
@ -72,20 +70,19 @@ static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
|
||||
|
||||
{
|
||||
// Convert to 16b.
|
||||
const int16x8_t dst01_s16 = ConvertU8ToS16_NEON(dst01);
|
||||
const int16x8_t dst23_s16 = ConvertU8ToS16_NEON(dst23);
|
||||
const int16x8_t dst01_s16 = ConvertU8ToS16(dst01);
|
||||
const int16x8_t dst23_s16 = ConvertU8ToS16(dst23);
|
||||
|
||||
// Descale with rounding.
|
||||
const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
|
||||
const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
|
||||
// Add the inverse transform.
|
||||
SaturateAndStore4x4_NEON(dst, out01, out23);
|
||||
SaturateAndStore4x4(dst, out01, out23);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
|
||||
const int16x8_t in1,
|
||||
int16x8x2_t* const out) {
|
||||
static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
|
||||
int16x8x2_t* const out) {
|
||||
// a0 a1 a2 a3 | b0 b1 b2 b3 => a0 b0 c0 d0 | a1 b1 c1 d1
|
||||
// c0 c1 c2 c3 | d0 d1 d2 d3 a2 b2 c2 d2 | a3 b3 c3 d3
|
||||
const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
|
||||
@ -93,7 +90,7 @@ static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
|
||||
*out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
|
||||
// {rows} = in0 | in4
|
||||
// in8 | in12
|
||||
// B1 = in4 | in12
|
||||
@ -116,22 +113,22 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c
|
||||
const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
|
||||
const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
Transpose8x2(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass_NEON(&rows);
|
||||
TransformPass_NEON(&rows);
|
||||
Add4x4_NEON(rows.val[0], rows.val[1], ref, dst);
|
||||
TransformPass(&rows);
|
||||
TransformPass(&rows);
|
||||
Add4x4(rows.val[0], rows.val[1], ref, dst);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
const int kBPS = BPS;
|
||||
const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
|
||||
|
||||
@ -246,16 +243,16 @@ static void ITransformOne_NEON(const uint8_t* ref,
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void ITransform_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst, int do_two) {
|
||||
ITransformOne_NEON(ref, in, dst);
|
||||
static void ITransform(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_NEON(ref + 4, in + 16, dst + 4);
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
// Load all 4x4 pixels into a single uint8x16_t variable.
|
||||
static uint8x16_t Load4x4_NEON(const uint8_t* src) {
|
||||
static uint8x16_t Load4x4(const uint8_t* src) {
|
||||
uint32x4_t out = vdupq_n_u32(0);
|
||||
out = vld1q_lane_u32((const uint32_t*)(src + 0 * BPS), out, 0);
|
||||
out = vld1q_lane_u32((const uint32_t*)(src + 1 * BPS), out, 1);
|
||||
@ -268,12 +265,10 @@ static uint8x16_t Load4x4_NEON(const uint8_t* src) {
|
||||
|
||||
#if defined(WEBP_USE_INTRINSICS)
|
||||
|
||||
static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A,
|
||||
const int16x4_t B,
|
||||
const int16x4_t C,
|
||||
const int16x4_t D,
|
||||
int16x8_t* const out01,
|
||||
int16x8_t* const out32) {
|
||||
static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
|
||||
const int16x4_t C, const int16x4_t D,
|
||||
int16x8_t* const out01,
|
||||
int16x8_t* const out32) {
|
||||
const int16x4x2_t AB = vtrn_s16(A, B);
|
||||
const int16x4x2_t CD = vtrn_s16(C, D);
|
||||
const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]),
|
||||
@ -288,24 +283,24 @@ static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A,
|
||||
vreinterpret_s64_s32(tmp02.val[1])));
|
||||
}
|
||||
|
||||
static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
|
||||
const uint8x8_t b) {
|
||||
static WEBP_INLINE int16x8_t DiffU8ToS16(const uint8x8_t a,
|
||||
const uint8x8_t b) {
|
||||
return vreinterpretq_s16_u16(vsubl_u8(a, b));
|
||||
}
|
||||
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
int16x8_t d0d1, d3d2; // working 4x4 int16 variables
|
||||
{
|
||||
const uint8x16_t S0 = Load4x4_NEON(src);
|
||||
const uint8x16_t R0 = Load4x4_NEON(ref);
|
||||
const int16x8_t D0D1 = DiffU8ToS16_NEON(vget_low_u8(S0), vget_low_u8(R0));
|
||||
const int16x8_t D2D3 = DiffU8ToS16_NEON(vget_high_u8(S0), vget_high_u8(R0));
|
||||
const uint8x16_t S0 = Load4x4(src);
|
||||
const uint8x16_t R0 = Load4x4(ref);
|
||||
const int16x8_t D0D1 = DiffU8ToS16(vget_low_u8(S0), vget_low_u8(R0));
|
||||
const int16x8_t D2D3 = DiffU8ToS16(vget_high_u8(S0), vget_high_u8(R0));
|
||||
const int16x4_t D0 = vget_low_s16(D0D1);
|
||||
const int16x4_t D1 = vget_high_s16(D0D1);
|
||||
const int16x4_t D2 = vget_low_s16(D2D3);
|
||||
const int16x4_t D3 = vget_high_s16(D2D3);
|
||||
Transpose4x4_S16_NEON(D0, D1, D2, D3, &d0d1, &d3d2);
|
||||
Transpose4x4_S16(D0, D1, D2, D3, &d0d1, &d3d2);
|
||||
}
|
||||
{ // 1rst pass
|
||||
const int32x4_t kCst937 = vdupq_n_s32(937);
|
||||
@ -323,7 +318,7 @@ static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
|
||||
const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9);
|
||||
const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
|
||||
Transpose4x4_S16_NEON(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
|
||||
Transpose4x4_S16(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
|
||||
}
|
||||
{ // 2nd pass
|
||||
// the (1<<16) addition is for the replacement: a3!=0 <-> 1-(a3==0)
|
||||
@ -363,8 +358,8 @@ static const int32_t kCoeff32[] = {
|
||||
51000, 51000, 51000, 51000
|
||||
};
|
||||
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
const int kBPS = BPS;
|
||||
const uint8_t* src_ptr = src;
|
||||
const uint8_t* ref_ptr = ref;
|
||||
@ -483,7 +478,7 @@ static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
src += stride; \
|
||||
} while (0)
|
||||
|
||||
static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
|
||||
static void FTransformWHT(const int16_t* src, int16_t* out) {
|
||||
const int stride = 16;
|
||||
const int16x4_t zero = vdup_n_s16(0);
|
||||
int32x4x4_t tmp0;
|
||||
@ -521,7 +516,7 @@ static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
|
||||
tmp0.val[3] = vsubq_s32(a0, a1);
|
||||
}
|
||||
{
|
||||
const int32x4x4_t tmp1 = Transpose4x4_NEON(tmp0);
|
||||
const int32x4x4_t tmp1 = Transpose4x4(tmp0);
|
||||
// a0 = tmp[0 + i] + tmp[ 8 + i]
|
||||
// a1 = tmp[4 + i] + tmp[12 + i]
|
||||
// a2 = tmp[4 + i] - tmp[12 + i]
|
||||
@ -565,7 +560,7 @@ static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
|
||||
// a 26ae, b 26ae
|
||||
// a 37bf, b 37bf
|
||||
//
|
||||
static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) {
|
||||
static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
|
||||
const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]);
|
||||
const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]);
|
||||
const int32x4x2_t q2_tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[0]),
|
||||
@ -579,8 +574,7 @@ static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) {
|
||||
return q4_in;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
|
||||
const int16x8x4_t q4_in) {
|
||||
static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const int16x8x4_t q4_in) {
|
||||
// {a0, a1} = {in[0] + in[2], in[1] + in[3]}
|
||||
// {a3, a2} = {in[0] - in[2], in[1] - in[3]}
|
||||
const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
|
||||
@ -599,7 +593,7 @@ static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
|
||||
return q4_out;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int16x8x4_t DistoVerticalPass_NEON(const uint8x8x4_t q4_in) {
|
||||
static WEBP_INLINE int16x8x4_t DistoVerticalPass(const uint8x8x4_t q4_in) {
|
||||
const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0],
|
||||
q4_in.val[2]));
|
||||
const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1],
|
||||
@ -616,7 +610,7 @@ static WEBP_INLINE int16x8x4_t DistoVerticalPass_NEON(const uint8x8x4_t q4_in) {
|
||||
return q4_out;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int16x4x4_t DistoLoadW_NEON(const uint16_t* w) {
|
||||
static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
|
||||
const uint16x8_t q_w07 = vld1q_u16(&w[0]);
|
||||
const uint16x8_t q_w8f = vld1q_u16(&w[8]);
|
||||
int16x4x4_t d4_w;
|
||||
@ -628,8 +622,8 @@ static WEBP_INLINE int16x4x4_t DistoLoadW_NEON(const uint16_t* w) {
|
||||
return d4_w;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
|
||||
const int16x4x4_t d4_w) {
|
||||
static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
|
||||
const int16x4x4_t d4_w) {
|
||||
int32x2_t d_sum;
|
||||
// sum += w[ 0] * abs(b0);
|
||||
// sum += w[ 4] * abs(b1);
|
||||
@ -658,8 +652,8 @@ static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
|
||||
@ -685,12 +679,12 @@ static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
// Vertical pass first to avoid a transpose (vertical and horizontal passes
|
||||
// are commutative because w/kWeightY is symmetric) and subsequent
|
||||
// transpose.
|
||||
const int16x8x4_t q4_v = DistoVerticalPass_NEON(d4_in);
|
||||
const int16x4x4_t d4_w = DistoLoadW_NEON(w);
|
||||
const int16x8x4_t q4_v = DistoVerticalPass(d4_in);
|
||||
const int16x4x4_t d4_w = DistoLoadW(w);
|
||||
// horizontal pass
|
||||
const int16x8x4_t q4_t = DistoTranspose4x4S16_NEON(q4_v);
|
||||
const int16x8x4_t q4_h = DistoHorizontalPass_NEON(q4_t);
|
||||
int32x2_t d_sum = DistoSum_NEON(q4_h, d4_w);
|
||||
const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_v);
|
||||
const int16x8x4_t q4_h = DistoHorizontalPass(q4_t);
|
||||
int32x2_t d_sum = DistoSum(q4_h, d4_w);
|
||||
|
||||
// abs(sum2 - sum1) >> 5
|
||||
d_sum = vabs_s32(d_sum);
|
||||
@ -700,13 +694,13 @@ static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
}
|
||||
#undef LOAD_LANE_32b
|
||||
|
||||
static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4_NEON(a + x + y, b + x + y, w);
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -714,15 +708,15 @@ static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
int16_t out[16];
|
||||
FTransform_NEON(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
|
||||
FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
|
||||
{
|
||||
int k;
|
||||
const int16x8_t a0 = vld1q_s16(out + 0);
|
||||
@ -746,9 +740,9 @@ static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
uint32x4_t* const sum) {
|
||||
static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
uint32x4_t* const sum) {
|
||||
const uint8x16_t a0 = vld1q_u8(a);
|
||||
const uint8x16_t b0 = vld1q_u8(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
@ -763,7 +757,7 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
|
||||
}
|
||||
|
||||
// Horizontal sum of all four uint32_t values in 'sum'.
|
||||
static int SumToInt_NEON(uint32x4_t sum) {
|
||||
static int SumToInt(uint32x4_t sum) {
|
||||
const uint64x2_t sum2 = vpaddlq_u32(sum);
|
||||
const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
|
||||
return (int)sum3;
|
||||
@ -773,18 +767,18 @@ static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 16; ++y) {
|
||||
AccumulateSSE16_NEON(a + y * BPS, b + y * BPS, &sum);
|
||||
AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
|
||||
}
|
||||
return SumToInt_NEON(sum);
|
||||
return SumToInt(sum);
|
||||
}
|
||||
|
||||
static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
AccumulateSSE16_NEON(a + y * BPS, b + y * BPS, &sum);
|
||||
AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
|
||||
}
|
||||
return SumToInt_NEON(sum);
|
||||
return SumToInt(sum);
|
||||
}
|
||||
|
||||
static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
@ -797,12 +791,12 @@ static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
const uint16x8_t prod = vmull_u8(abs_diff, abs_diff);
|
||||
sum = vpadalq_u16(sum, prod);
|
||||
}
|
||||
return SumToInt_NEON(sum);
|
||||
return SumToInt(sum);
|
||||
}
|
||||
|
||||
static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
const uint8x16_t a0 = Load4x4_NEON(a);
|
||||
const uint8x16_t b0 = Load4x4_NEON(b);
|
||||
const uint8x16_t a0 = Load4x4(a);
|
||||
const uint8x16_t b0 = Load4x4(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
|
||||
vget_low_u8(abs_diff));
|
||||
@ -811,7 +805,7 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
/* pair-wise adds and widen */
|
||||
const uint32x4_t sum1 = vpaddlq_u16(prod1);
|
||||
const uint32x4_t sum2 = vpaddlq_u16(prod2);
|
||||
return SumToInt_NEON(vaddq_u32(sum1, sum2));
|
||||
return SumToInt(vaddq_u32(sum1, sum2));
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -819,8 +813,8 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
// Compilation with gcc-4.6.x is problematic for now.
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
|
||||
static int16x8_t Quantize_NEON(int16_t* const in,
|
||||
const VP8Matrix* const mtx, int offset) {
|
||||
static int16x8_t Quantize(int16_t* const in,
|
||||
const VP8Matrix* const mtx, int offset) {
|
||||
const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
|
||||
const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
|
||||
const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
|
||||
@ -853,10 +847,10 @@ static const uint8_t kShuffles[4][8] = {
|
||||
{ 14, 15, 22, 23, 28, 29, 30, 31 }
|
||||
};
|
||||
|
||||
static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const int16x8_t out0 = Quantize_NEON(in, mtx, 0);
|
||||
const int16x8_t out1 = Quantize_NEON(in, mtx, 8);
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const int16x8_t out0 = Quantize(in, mtx, 0);
|
||||
const int16x8_t out1 = Quantize(in, mtx, 8);
|
||||
uint8x8x4_t shuffles;
|
||||
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
|
||||
// non-standard versions there.
|
||||
@ -895,11 +889,11 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -911,14 +905,14 @@ static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
extern void VP8EncDspInitNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
|
||||
VP8ITransform = ITransform_NEON;
|
||||
VP8FTransform = FTransform_NEON;
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
|
||||
VP8FTransformWHT = FTransformWHT_NEON;
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
|
||||
VP8TDisto4x4 = Disto4x4_NEON;
|
||||
VP8TDisto16x16 = Disto16x16_NEON;
|
||||
VP8CollectHistogram = CollectHistogram_NEON;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
|
||||
VP8SSE16x16 = SSE16x16_NEON;
|
||||
VP8SSE16x8 = SSE16x8_NEON;
|
||||
@ -926,8 +920,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
|
||||
VP8SSE4x4 = SSE4x4_NEON;
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
VP8EncQuantizeBlock = QuantizeBlock_NEON;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_NEON;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -11,23 +11,23 @@
|
||||
//
|
||||
// Author: Christian Duvivier (cduvivier@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <assert.h>
|
||||
#include <stdlib.h> // for abs()
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "./common_sse2.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -193,10 +193,10 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransformPass1_SSE2(const __m128i* const in01,
|
||||
const __m128i* const in23,
|
||||
__m128i* const out01,
|
||||
__m128i* const out32) {
|
||||
static void FTransformPass1(const __m128i* const in01,
|
||||
const __m128i* const in23,
|
||||
__m128i* const out01,
|
||||
__m128i* const out32) {
|
||||
const __m128i k937 = _mm_set1_epi32(937);
|
||||
const __m128i k1812 = _mm_set1_epi32(1812);
|
||||
|
||||
@ -239,9 +239,8 @@ static void FTransformPass1_SSE2(const __m128i* const in01,
|
||||
*out32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2)); // 3 2 3 2 3 2..
|
||||
}
|
||||
|
||||
static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
const __m128i* const v32,
|
||||
int16_t* out) {
|
||||
static void FTransformPass2(const __m128i* const v01, const __m128i* const v32,
|
||||
int16_t* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i seven = _mm_set1_epi16(7);
|
||||
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
|
||||
@ -292,8 +291,7 @@ static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
_mm_storeu_si128((__m128i*)&out[8], d2_f3);
|
||||
}
|
||||
|
||||
static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
// Load src.
|
||||
const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
|
||||
@ -330,14 +328,13 @@ static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
__m128i v01, v32;
|
||||
|
||||
// First pass
|
||||
FTransformPass1_SSE2(&row01, &row23, &v01, &v32);
|
||||
FTransformPass1(&row01, &row23, &v01, &v32);
|
||||
|
||||
// Second pass
|
||||
FTransformPass2_SSE2(&v01, &v32, out);
|
||||
FTransformPass2(&v01, &v32, out);
|
||||
}
|
||||
|
||||
static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load src and convert to 16b.
|
||||
@ -377,15 +374,15 @@ static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
__m128i v01h, v32h;
|
||||
|
||||
// First pass
|
||||
FTransformPass1_SSE2(&shuf01l, &shuf23l, &v01l, &v32l);
|
||||
FTransformPass1_SSE2(&shuf01h, &shuf23h, &v01h, &v32h);
|
||||
FTransformPass1(&shuf01l, &shuf23l, &v01l, &v32l);
|
||||
FTransformPass1(&shuf01h, &shuf23h, &v01h, &v32h);
|
||||
|
||||
// Second pass
|
||||
FTransformPass2_SSE2(&v01l, &v32l, out + 0);
|
||||
FTransformPass2_SSE2(&v01h, &v32h, out + 16);
|
||||
FTransformPass2(&v01l, &v32l, out + 0);
|
||||
FTransformPass2(&v01h, &v32h, out + 16);
|
||||
}
|
||||
|
||||
static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
|
||||
const __m128i kMult = _mm_set_epi16(-1, 1, -1, 1, 1, 1, 1, 1);
|
||||
const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
|
||||
const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
|
||||
@ -401,14 +398,14 @@ static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
*out = _mm_madd_epi16(D, kMult);
|
||||
}
|
||||
|
||||
static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
// Input is 12b signed.
|
||||
__m128i row0, row1, row2, row3;
|
||||
// Rows are 14b signed.
|
||||
FTransformWHTRow_SSE2(in + 0 * 64, &row0);
|
||||
FTransformWHTRow_SSE2(in + 1 * 64, &row1);
|
||||
FTransformWHTRow_SSE2(in + 2 * 64, &row2);
|
||||
FTransformWHTRow_SSE2(in + 3 * 64, &row3);
|
||||
FTransformWHTRow(in + 0 * 64, &row0);
|
||||
FTransformWHTRow(in + 1 * 64, &row1);
|
||||
FTransformWHTRow(in + 2 * 64, &row2);
|
||||
FTransformWHTRow(in + 3 * 64, &row3);
|
||||
|
||||
{
|
||||
// The a* are 15b signed.
|
||||
@ -434,9 +431,9 @@ static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
// Compute susceptibility based on DCT-coeff histograms:
|
||||
// the higher, the "easier" the macroblock is to compress.
|
||||
|
||||
static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
@ -445,7 +442,7 @@ static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
|
||||
int16_t out[16];
|
||||
int k;
|
||||
|
||||
FTransform_SSE2(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
|
||||
FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
|
||||
|
||||
// Convert coefficients to bin (within out[]).
|
||||
{
|
||||
@ -479,7 +476,7 @@ static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
|
||||
// Intra predictions
|
||||
|
||||
// helper for chroma-DC predictions
|
||||
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -487,7 +484,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -495,20 +492,20 @@ static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Fill_SSE2(uint8_t* dst, int value, int size) {
|
||||
static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
|
||||
if (size == 4) {
|
||||
int j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
memset(dst + j * BPS, value, 4);
|
||||
}
|
||||
} else if (size == 8) {
|
||||
Put8x8uv_SSE2(value, dst);
|
||||
Put8x8uv(value, dst);
|
||||
} else {
|
||||
Put16_SSE2(value, dst);
|
||||
Put16(value, dst);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE8uv(uint8_t* dst, const uint8_t* top) {
|
||||
int j;
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -516,7 +513,7 @@ static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE16(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i top_values = _mm_load_si128((const __m128i*)top);
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -524,20 +521,20 @@ static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
if (top != NULL) {
|
||||
if (size == 8) {
|
||||
VE8uv_SSE2(dst, top);
|
||||
VE8uv(dst, top);
|
||||
} else {
|
||||
VE16_SSE2(dst, top);
|
||||
VE16(dst, top);
|
||||
}
|
||||
} else {
|
||||
Fill_SSE2(dst, 127, size);
|
||||
Fill(dst, 127, size);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE8uv(uint8_t* dst, const uint8_t* left) {
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(left[j]);
|
||||
@ -546,7 +543,7 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE16(uint8_t* dst, const uint8_t* left) {
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(left[j]);
|
||||
@ -555,21 +552,21 @@ static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
if (left != NULL) {
|
||||
if (size == 8) {
|
||||
HE8uv_SSE2(dst, left);
|
||||
HE8uv(dst, left);
|
||||
} else {
|
||||
HE16_SSE2(dst, left);
|
||||
HE16(dst, left);
|
||||
}
|
||||
} else {
|
||||
Fill_SSE2(dst, 129, size);
|
||||
Fill(dst, 129, size);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TM(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
if (size == 8) {
|
||||
@ -596,13 +593,13 @@ static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
TM_SSE2(dst, left, top, size);
|
||||
TM(dst, left, top, size);
|
||||
} else {
|
||||
HorizontalPred_SSE2(dst, left, size);
|
||||
HorizontalPred(dst, left, size);
|
||||
}
|
||||
} else {
|
||||
// true motion without left samples (hence: with default 129 value)
|
||||
@ -610,90 +607,90 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
// Note that if top samples are not available, the default value is
|
||||
// then 129, and not 127 as in the VerticalPred case.
|
||||
if (top != NULL) {
|
||||
VerticalPred_SSE2(dst, top, size);
|
||||
VerticalPred(dst, top, size);
|
||||
} else {
|
||||
Fill_SSE2(dst, 129, size);
|
||||
Fill(dst, 129, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uv(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
|
||||
const __m128i combined = _mm_unpacklo_epi64(top_values, left_values);
|
||||
const int DC = VP8HorizontalAdd8b(&combined) + 8;
|
||||
Put8x8uv_SSE2(DC >> 4, dst);
|
||||
Put8x8uv(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvNoLeft(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i sum = _mm_sad_epu8(top_values, zero);
|
||||
const int DC = _mm_cvtsi128_si32(sum) + 4;
|
||||
Put8x8uv_SSE2(DC >> 3, dst);
|
||||
Put8x8uv(DC >> 3, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC8uvNoTop(uint8_t* dst, const uint8_t* left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC8uvNoLeft_SSE2(dst, left);
|
||||
DC8uvNoLeft(dst, left);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put8x8uv_SSE2(0x80, dst);
|
||||
static WEBP_INLINE void DC8uvNoTopLeft(uint8_t* dst) {
|
||||
Put8x8uv(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvMode(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC8uv_SSE2(dst, left, top);
|
||||
DC8uv(dst, left, top);
|
||||
} else { // top, but no left
|
||||
DC8uvNoLeft_SSE2(dst, top);
|
||||
DC8uvNoLeft(dst, top);
|
||||
}
|
||||
} else if (left != NULL) { // left but no top
|
||||
DC8uvNoTop_SSE2(dst, left);
|
||||
DC8uvNoTop(dst, left);
|
||||
} else { // no top, no left, nothing.
|
||||
DC8uvNoTopLeft_SSE2(dst);
|
||||
DC8uvNoTopLeft(dst);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const __m128i left_row = _mm_load_si128((const __m128i*)left);
|
||||
const int DC =
|
||||
VP8HorizontalAdd8b(&top_row) + VP8HorizontalAdd8b(&left_row) + 16;
|
||||
Put16_SSE2(DC >> 5, dst);
|
||||
Put16(DC >> 5, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16NoLeft(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const int DC = VP8HorizontalAdd8b(&top_row) + 8;
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
Put16(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC16NoTop(uint8_t* dst, const uint8_t* left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC16NoLeft_SSE2(dst, left);
|
||||
DC16NoLeft(dst, left);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put16_SSE2(0x80, dst);
|
||||
static WEBP_INLINE void DC16NoTopLeft(uint8_t* dst) {
|
||||
Put16(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16Mode(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC16_SSE2(dst, left, top);
|
||||
DC16(dst, left, top);
|
||||
} else { // top, but no left
|
||||
DC16NoLeft_SSE2(dst, top);
|
||||
DC16NoLeft(dst, top);
|
||||
}
|
||||
} else if (left != NULL) { // left but no top
|
||||
DC16NoTop_SSE2(dst, left);
|
||||
DC16NoTop(dst, left);
|
||||
} else { // no top, no left, nothing.
|
||||
DC16NoTopLeft_SSE2(dst);
|
||||
DC16NoTopLeft(dst);
|
||||
}
|
||||
}
|
||||
|
||||
@ -712,8 +709,7 @@ static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
|
||||
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
|
||||
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // vertical
|
||||
static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -729,8 +725,7 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // horizontal
|
||||
static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -742,15 +737,14 @@ static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill_SSE2(dst, dc >> 3, 4);
|
||||
Fill(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-Left
|
||||
static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) { // Down-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -766,8 +760,8 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Right
|
||||
static WEBP_INLINE void VR4(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -792,8 +786,8 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Left
|
||||
static WEBP_INLINE void VL4(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -818,8 +812,7 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
DST(3, 3) = (extra_out >> 8) & 0xff;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-right
|
||||
static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) { // Down-right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
|
||||
const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
|
||||
@ -835,7 +828,7 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -850,7 +843,7 @@ static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -873,7 +866,7 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
|
||||
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
|
||||
@ -895,56 +888,55 @@ static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DC4_SSE2(I4DC4 + dst, top);
|
||||
TM4_SSE2(I4TM4 + dst, top);
|
||||
VE4_SSE2(I4VE4 + dst, top);
|
||||
HE4_SSE2(I4HE4 + dst, top);
|
||||
RD4_SSE2(I4RD4 + dst, top);
|
||||
VR4_SSE2(I4VR4 + dst, top);
|
||||
LD4_SSE2(I4LD4 + dst, top);
|
||||
VL4_SSE2(I4VL4 + dst, top);
|
||||
HD4_SSE2(I4HD4 + dst, top);
|
||||
HU4_SSE2(I4HU4 + dst, top);
|
||||
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
HE4(I4HE4 + dst, top);
|
||||
RD4(I4RD4 + dst, top);
|
||||
VR4(I4VR4 + dst, top);
|
||||
LD4(I4LD4 + dst, top);
|
||||
VL4(I4VL4 + dst, top);
|
||||
HD4(I4HD4 + dst, top);
|
||||
HU4(I4HU4 + dst, top);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DC8uvMode_SSE2(C8DC8 + dst, left, top);
|
||||
VerticalPred_SSE2(C8VE8 + dst, top, 8);
|
||||
HorizontalPred_SSE2(C8HE8 + dst, left, 8);
|
||||
TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
|
||||
DC8uvMode(C8DC8 + dst, left, top);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
HorizontalPred(C8HE8 + dst, left, 8);
|
||||
TrueMotion(C8TM8 + dst, left, top, 8);
|
||||
// V block
|
||||
dst += 8;
|
||||
if (top != NULL) top += 8;
|
||||
if (left != NULL) left += 16;
|
||||
DC8uvMode_SSE2(C8DC8 + dst, left, top);
|
||||
VerticalPred_SSE2(C8VE8 + dst, top, 8);
|
||||
HorizontalPred_SSE2(C8HE8 + dst, left, 8);
|
||||
TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
|
||||
DC8uvMode(C8DC8 + dst, left, top);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
HorizontalPred(C8HE8 + dst, left, 8);
|
||||
TrueMotion(C8TM8 + dst, left, top, 8);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DC16Mode_SSE2(I16DC16 + dst, left, top);
|
||||
VerticalPred_SSE2(I16VE16 + dst, top, 16);
|
||||
HorizontalPred_SSE2(I16HE16 + dst, left, 16);
|
||||
TrueMotion_SSE2(I16TM16 + dst, left, top, 16);
|
||||
static void Intra16Preds(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DC16Mode(I16DC16 + dst, left, top);
|
||||
VerticalPred(I16VE16 + dst, top, 16);
|
||||
HorizontalPred(I16HE16 + dst, left, 16);
|
||||
TrueMotion(I16TM16 + dst, left, top, 16);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Metric
|
||||
|
||||
static WEBP_INLINE void SubtractAndAccumulate_SSE2(const __m128i a,
|
||||
const __m128i b,
|
||||
__m128i* const sum) {
|
||||
static WEBP_INLINE void SubtractAndAccumulate(const __m128i a, const __m128i b,
|
||||
__m128i* const sum) {
|
||||
// take abs(a-b) in 8b
|
||||
const __m128i a_b = _mm_subs_epu8(a, b);
|
||||
const __m128i b_a = _mm_subs_epu8(b, a);
|
||||
@ -959,8 +951,8 @@ static WEBP_INLINE void SubtractAndAccumulate_SSE2(const __m128i a,
|
||||
*sum = _mm_add_epi32(sum1, sum2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
int num_pairs) {
|
||||
static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
|
||||
int num_pairs) {
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
int32_t tmp[4];
|
||||
int i;
|
||||
@ -971,8 +963,8 @@ static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[BPS * 1]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[BPS * 1]);
|
||||
__m128i sum1, sum2;
|
||||
SubtractAndAccumulate_SSE2(a0, b0, &sum1);
|
||||
SubtractAndAccumulate_SSE2(a1, b1, &sum2);
|
||||
SubtractAndAccumulate(a0, b0, &sum1);
|
||||
SubtractAndAccumulate(a1, b1, &sum2);
|
||||
sum = _mm_add_epi32(sum, _mm_add_epi32(sum1, sum2));
|
||||
a += 2 * BPS;
|
||||
b += 2 * BPS;
|
||||
@ -981,18 +973,18 @@ static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
|
||||
}
|
||||
|
||||
static int SSE16x16_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN_SSE2(a, b, 8);
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN(a, b, 8);
|
||||
}
|
||||
|
||||
static int SSE16x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN_SSE2(a, b, 4);
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN(a, b, 4);
|
||||
}
|
||||
|
||||
#define LOAD_8x16b(ptr) \
|
||||
_mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
|
||||
|
||||
static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int num_pairs = 4;
|
||||
__m128i sum = zero;
|
||||
@ -1019,7 +1011,7 @@ static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
}
|
||||
#undef LOAD_8x16b
|
||||
|
||||
static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load values. Note that we read 8 pixels instead of 4,
|
||||
@ -1056,7 +1048,7 @@ static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
|
||||
const __m128i mask = _mm_set1_epi16(0x00ff);
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&ref[BPS * 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
|
||||
@ -1094,8 +1086,8 @@ static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
static int TTransform(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
int32_t sum[4];
|
||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -1195,19 +1187,19 @@ static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform_SSE2(a, b, w);
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4_SSE2(a + x + y, b + x + y, w);
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -1217,9 +1209,9 @@ static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
// Quantization
|
||||
//
|
||||
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i coeff0, coeff8;
|
||||
@ -1329,22 +1321,22 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
|
||||
}
|
||||
|
||||
static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, NULL, mtx);
|
||||
static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
nz |= DoQuantizeBlock_SSE2(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
|
||||
nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -1354,24 +1346,24 @@ static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
|
||||
extern void VP8EncDspInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) {
|
||||
VP8CollectHistogram = CollectHistogram_SSE2;
|
||||
VP8EncPredLuma16 = Intra16Preds_SSE2;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_SSE2;
|
||||
VP8EncPredLuma4 = Intra4Preds_SSE2;
|
||||
VP8EncQuantizeBlock = QuantizeBlock_SSE2;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_SSE2;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHT_SSE2;
|
||||
VP8ITransform = ITransform_SSE2;
|
||||
VP8FTransform = FTransform_SSE2;
|
||||
VP8FTransform2 = FTransform2_SSE2;
|
||||
VP8FTransformWHT = FTransformWHT_SSE2;
|
||||
VP8SSE16x16 = SSE16x16_SSE2;
|
||||
VP8SSE16x8 = SSE16x8_SSE2;
|
||||
VP8SSE8x8 = SSE8x8_SSE2;
|
||||
VP8SSE4x4 = SSE4x4_SSE2;
|
||||
VP8TDisto4x4 = Disto4x4_SSE2;
|
||||
VP8TDisto16x16 = Disto16x16_SSE2;
|
||||
VP8Mean16x4 = Mean16x4_SSE2;
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
VP8EncPredLuma16 = Intra16Preds;
|
||||
VP8EncPredChroma8 = IntraChromaPreds;
|
||||
VP8EncPredLuma4 = Intra4Preds;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
VP8FTransform2 = FTransform2;
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8Mean16x4 = Mean16x4;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -11,21 +11,21 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#include <smmintrin.h>
|
||||
#include <stdlib.h> // for abs()
|
||||
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "./common_sse2.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Compute susceptibility based on DCT-coeff histograms.
|
||||
|
||||
static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -70,8 +70,8 @@ static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform_SSE41(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
static int TTransform(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
int32_t sum[4];
|
||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||
|
||||
@ -168,19 +168,19 @@ static int TTransform_SSE41(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform_SSE41(a, b, w);
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4_SSE41(a + x + y, b + x + y, w);
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -197,9 +197,9 @@ static int Disto16x16_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, \
|
||||
2 * (B) + 1, 2 * (B) + 0, 2 * (A) + 1, 2 * (A) + 0)
|
||||
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i out0, out8;
|
||||
@ -300,22 +300,22 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
|
||||
#undef PSHUFB_CST
|
||||
|
||||
static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, NULL, mtx);
|
||||
static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
nz |= DoQuantizeBlock_SSE41(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
|
||||
nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -324,12 +324,12 @@ static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
|
||||
|
||||
extern void VP8EncDspInitSSE41(void);
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE41(void) {
|
||||
VP8CollectHistogram = CollectHistogram_SSE41;
|
||||
VP8EncQuantizeBlock = QuantizeBlock_SSE41;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_SSE41;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHT_SSE41;
|
||||
VP8TDisto4x4 = Disto4x4_SSE41;
|
||||
VP8TDisto16x16 = Disto16x16_SSE41;
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Urvang (urvang@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -20,17 +20,16 @@
|
||||
// Helpful macro.
|
||||
|
||||
# define SANITY_CHECK(in, out) \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; // Silence unused warning.
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length, int inverse) {
|
||||
static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length, int inverse) {
|
||||
int i;
|
||||
if (inverse) {
|
||||
for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
|
||||
@ -42,10 +41,10 @@ static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -57,7 +56,7 @@ static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
@ -67,8 +66,8 @@ static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
PredictLine(in, preds - stride, out, 1, inverse);
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
@ -79,10 +78,10 @@ static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -95,7 +94,7 @@ static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -106,28 +105,26 @@ static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
PredictLine_C(in, preds, out, width, inverse);
|
||||
PredictLine(in, preds, out, width, inverse);
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Gradient filter.
|
||||
|
||||
static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
|
||||
const int g = a + b - c;
|
||||
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -139,7 +136,7 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
@ -150,11 +147,11 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
while (row < last_row) {
|
||||
int w;
|
||||
// leftmost pixel: predict from above.
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
PredictLine(in, preds - stride, out, 1, inverse);
|
||||
for (w = 1; w < width; ++w) {
|
||||
const int pred = GradientPredictor_C(preds[w - 1],
|
||||
preds[w - stride],
|
||||
preds[w - stride - 1]);
|
||||
const int pred = GradientPredictor(preds[w - 1],
|
||||
preds[w - stride],
|
||||
preds[w - stride - 1]);
|
||||
out[w] = in[w] + (inverse ? pred : -pred);
|
||||
}
|
||||
++row;
|
||||
@ -163,34 +160,32 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#undef SANITY_CHECK
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void HorizontalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
|
||||
filtered_data);
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
uint8_t pred = (prev == NULL) ? 0 : prev[0];
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
@ -199,28 +194,26 @@ static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter_C(NULL, in, out, width);
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) out[i] = prev[i] + in[i];
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter_C(NULL, in, out, width);
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
} else {
|
||||
uint8_t top = prev[0], top_left = top, left = top;
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
top = prev[i]; // need to read this first, in case prev==out
|
||||
left = in[i] + GradientPredictor_C(left, top, top_left);
|
||||
left = in[i] + GradientPredictor(left, top, top_left);
|
||||
top_left = top;
|
||||
out[i] = left;
|
||||
}
|
||||
@ -245,18 +238,14 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
|
||||
if (filters_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WebPUnfilters[WEBP_FILTER_NONE] = NULL;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
|
||||
#endif
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_C;
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
|
||||
|
||||
WebPFilters[WEBP_FILTER_NONE] = NULL;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_C;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_C;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_C;
|
||||
#endif
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
@ -264,6 +253,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
|
||||
VP8FiltersInitSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8FiltersInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
VP8FiltersInitMIPSdspR2();
|
||||
@ -275,20 +269,5 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8FiltersInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
|
||||
assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
|
||||
assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
|
||||
assert(WebPFilters[WEBP_FILTER_HORIZONTAL] != NULL);
|
||||
assert(WebPFilters[WEBP_FILTER_VERTICAL] != NULL);
|
||||
assert(WebPFilters[WEBP_FILTER_GRADIENT] != NULL);
|
||||
|
||||
filters_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
|
@ -12,11 +12,11 @@
|
||||
// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
|
||||
// Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -101,8 +101,8 @@
|
||||
); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
int length) {
|
||||
static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
|
||||
int length) {
|
||||
DO_PREDICT_LINE(src, dst, length, 0);
|
||||
}
|
||||
|
||||
@ -192,11 +192,10 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -208,7 +207,7 @@ static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
PredictLine(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
@ -220,11 +219,9 @@ static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
|
||||
int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -240,11 +237,9 @@ static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -257,7 +252,7 @@ static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
PredictLine(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -271,16 +266,15 @@ static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Gradient filter.
|
||||
|
||||
static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
|
||||
int temp0;
|
||||
__asm__ volatile (
|
||||
"addu %[temp0], %[a], %[b] \n\t"
|
||||
@ -299,9 +293,9 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
int w; \
|
||||
PREDICT_LINE_ONE_PASS(in, PREDS - stride, out); \
|
||||
for (w = 1; w < width; ++w) { \
|
||||
const int pred = GradientPredictor_MIPSdspR2(PREDS[w - 1], \
|
||||
PREDS[w - stride], \
|
||||
PREDS[w - stride - 1]); \
|
||||
const int pred = GradientPredictor(PREDS[w - 1], \
|
||||
PREDS[w - stride], \
|
||||
PREDS[w - stride - 1]); \
|
||||
out[w] = in[w] OPERATION pred; \
|
||||
} \
|
||||
++row; \
|
||||
@ -310,9 +304,9 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -324,7 +318,7 @@ static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
PredictLine(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
@ -336,39 +330,38 @@ static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
|
||||
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
|
||||
}
|
||||
|
||||
static void VerticalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
} else {
|
||||
DO_PREDICT_LINE_VERTICAL(in, prev, out, width, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
} else {
|
||||
uint8_t top = prev[0], top_left = top, left = top;
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
top = prev[i]; // need to read this first, in case prev==dst
|
||||
left = in[i] + GradientPredictor_MIPSdspR2(left, top, top_left);
|
||||
left = in[i] + GradientPredictor(left, top, top_left);
|
||||
top_left = top;
|
||||
out[i] = left;
|
||||
}
|
||||
@ -386,13 +379,13 @@ static void GradientUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
|
||||
extern void VP8FiltersInitMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMIPSdspR2(void) {
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_MIPSdspR2;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_MIPSdspR2;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_MIPSdspR2;
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
|
||||
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MIPSdspR2;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MIPSdspR2;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MIPSdspR2;
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
@ -11,11 +11,11 @@
|
||||
//
|
||||
// Author: Prashant Patil (prashant.patil@imgtec.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MSA)
|
||||
|
||||
#include "src/dsp/msa_macro.h"
|
||||
#include "./msa_macro.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
@ -66,8 +66,8 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horrizontal filter
|
||||
|
||||
static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
const uint8_t* preds = data;
|
||||
const uint8_t* in = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -129,8 +129,8 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
}
|
||||
|
||||
|
||||
static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -157,8 +157,8 @@ static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter
|
||||
|
||||
static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -190,9 +190,9 @@ static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
extern void VP8FiltersInitMSA(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMSA(void) {
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MSA;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MSA;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MSA;
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MSA
|
||||
|
@ -11,12 +11,12 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include <assert.h>
|
||||
#include "src/dsp/neon.h"
|
||||
#include "./neon.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Helpful macros.
|
||||
@ -134,7 +134,7 @@ static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
|
||||
}
|
||||
|
||||
static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
}
|
||||
@ -196,7 +196,7 @@ static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
|
||||
}
|
||||
|
||||
static void GradientFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
}
|
||||
@ -251,11 +251,9 @@ static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
|
||||
// GradientUnfilter_NEON is correct but slower than the C-version,
|
||||
// at least on ARM64. For armv7, it's a wash.
|
||||
// So best is to disable it for now, but keep the idea around...
|
||||
#if !defined(USE_GRADIENT_UNFILTER)
|
||||
#define USE_GRADIENT_UNFILTER 0 // ALTERNATE_CODE
|
||||
#endif
|
||||
// #define USE_GRADIENT_UNFILTER
|
||||
|
||||
#if (USE_GRADIENT_UNFILTER == 1)
|
||||
#if defined(USE_GRADIENT_UNFILTER)
|
||||
#define GRAD_PROCESS_LANE(L) do { \
|
||||
const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1); /* rotate predictor in */ \
|
||||
const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1)); \
|
||||
@ -294,7 +292,7 @@ static void GradientPredictInverse_NEON(const uint8_t* const in,
|
||||
#undef GRAD_PROCESS_LANE
|
||||
|
||||
static void GradientUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter_NEON(NULL, in, out, width);
|
||||
} else {
|
||||
@ -313,7 +311,7 @@ extern void VP8FiltersInitNEON(void);
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitNEON(void) {
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_NEON;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_NEON;
|
||||
#if (USE_GRADIENT_UNFILTER == 1)
|
||||
#if defined(USE_GRADIENT_UNFILTER)
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_NEON;
|
||||
#endif
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
@ -24,16 +24,16 @@
|
||||
// Helpful macro.
|
||||
|
||||
# define SANITY_CHECK(in, out) \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; // Silence unused warning.
|
||||
|
||||
static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -51,7 +51,7 @@ static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -71,11 +71,10 @@ static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
@ -85,7 +84,7 @@ static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -95,7 +94,7 @@ static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
while (row < last_row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
out[0] = in[0] - in[-stride];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -105,10 +104,9 @@ static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
@ -119,7 +117,7 @@ static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -127,7 +125,7 @@ static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
PredictLineTop_SSE2(in, in - stride, out, width);
|
||||
PredictLineTop(in, in - stride, out, width);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -137,14 +135,14 @@ static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Gradient filter.
|
||||
|
||||
static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
static WEBP_INLINE int GradientPredictorC(uint8_t a, uint8_t b, uint8_t c) {
|
||||
const int g = a + b - c;
|
||||
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
|
||||
}
|
||||
|
||||
static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
static void GradientPredictDirect(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
const int max_pos = length & ~7;
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -163,14 +161,14 @@ static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
_mm_storel_epi64((__m128i*)(out + i), H);
|
||||
}
|
||||
for (; i < length; ++i) {
|
||||
out[i] = row[i] - GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
|
||||
out[i] = row[i] - GradientPredictorC(row[i - 1], top[i], top[i - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
@ -180,7 +178,7 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -189,7 +187,7 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
out[0] = in[0] - in[-stride];
|
||||
GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -200,27 +198,26 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Inverse transforms
|
||||
|
||||
static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
int i;
|
||||
__m128i last;
|
||||
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
|
||||
@ -241,10 +238,10 @@ static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
for (; i < width; ++i) out[i] = in[i] + out[i - 1];
|
||||
}
|
||||
|
||||
static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter_SSE2(NULL, in, out, width);
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
} else {
|
||||
int i;
|
||||
const int max_pos = width & ~31;
|
||||
@ -263,9 +260,9 @@ static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientPredictInverse_SSE2(const uint8_t* const in,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const row, int length) {
|
||||
static void GradientPredictInverse(const uint8_t* const in,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const row, int length) {
|
||||
if (length > 0) {
|
||||
int i;
|
||||
const int max_pos = length & ~7;
|
||||
@ -296,18 +293,18 @@ static void GradientPredictInverse_SSE2(const uint8_t* const in,
|
||||
_mm_storel_epi64((__m128i*)&row[i], out);
|
||||
}
|
||||
for (; i < length; ++i) {
|
||||
row[i] = in[i] + GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
|
||||
row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter_SSE2(NULL, in, out, width);
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
} else {
|
||||
out[0] = in[0] + prev[0]; // predict from above
|
||||
GradientPredictInverse_SSE2(in + 1, prev + 1, out + 1, width - 1);
|
||||
GradientPredictInverse(in + 1, prev + 1, out + 1, width - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -317,13 +314,13 @@ static void GradientUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
extern void VP8FiltersInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2;
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
|
||||
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_SSE2;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_SSE2;
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user