mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-15 05:19:48 +02:00
Compare commits
155 Commits
portable-i
...
v0.6.1-rc2
Author | SHA1 | Date | |
---|---|---|---|
a289d8e774 | |||
c10a493caf | |||
0d4466c2b4 | |||
1b27bf8b76 | |||
126be10950 | |||
9add62b581 | |||
d3e2614493 | |||
2edda639b2 | |||
9ca568ef82 | |||
31f1995cc5 | |||
a80c46bd87 | |||
083507f244 | |||
2e5785b233 | |||
b299c47eac | |||
f593d71a64 | |||
541179a9a5 | |||
5755a7ec53 | |||
eab5bab74f | |||
8052c585b3 | |||
c245343dcb | |||
b9e734fd5c | |||
c188d546b3 | |||
28c5ac8104 | |||
e65b72a368 | |||
b94cee98fb | |||
44a0ee3fa7 | |||
aebf59ac50 | |||
c184665ecd | |||
3daf7509c2 | |||
80285d97ad | |||
650eac5542 | |||
c462cd0065 | |||
01a98217ad | |||
3c49fc47e7 | |||
fde2782ecb | |||
2a270c1df5 | |||
f1f437cc89 | |||
3879074d99 | |||
04b029d236 | |||
b7971d0e22 | |||
6ba98764e8 | |||
5cfb3b0f6c | |||
f433205ee3 | |||
8d033b14d7 | |||
0295e9815d | |||
d572c4e52b | |||
ab9c2500db | |||
93e0ce27f4 | |||
22fbc50edd | |||
447875b47b | |||
e51bdd439c | |||
785da7eadd | |||
bc1a251fcf | |||
61e535f1ac | |||
68b2eab7df | |||
30042faa9a | |||
0a17f4712c | |||
a439972175 | |||
0827570873 | |||
d361a6a733 | |||
6921aa6f0c | |||
08c67d3ed1 | |||
582a1b572a | |||
2c1b18ba2f | |||
0ac46e818b | |||
bc634d57c2 | |||
bcb7347c2b | |||
e14ad93c0a | |||
7038ca8d52 | |||
fb3daad604 | |||
be590e0644 | |||
35f736e1ec | |||
a5216efc8c | |||
a9c8916b87 | |||
3c74c645ca | |||
c7f295d30c | |||
b4e046778d | |||
f78da3dea6 | |||
01c426f1e7 | |||
8635973dc3 | |||
e9459382b0 | |||
4a9d788e40 | |||
4fbdc9fb12 | |||
a80fcc4ae1 | |||
3993af127e | |||
f66f94ef36 | |||
6eba857b75 | |||
c5e34fba66 | |||
3822762a6c | |||
501ef6e4e9 | |||
f8bdc26821 | |||
23bfc652fe | |||
8dc3d71ba0 | |||
5bd40066cc | |||
7945575c92 | |||
8729fa1102 | |||
f324b7f9ba | |||
869eb36983 | |||
289e62a313 | |||
20a94186ce | |||
34130afe8b | |||
42c79aa66b | |||
b09307dcde | |||
bed0456d58 | |||
54f6a3cf3a | |||
088f1dcce8 | |||
86fc4dd9f4 | |||
08ea9ecde3 | |||
6f9daa4a3a | |||
a0f72a4fe0 | |||
8c934902cd | |||
622242aaba | |||
1411f02761 | |||
24ad2e3c99 | |||
46efe062b8 | |||
8c3f9a4706 | |||
1aef4c710b | |||
b8821dbd81 | |||
7beed2807b | |||
6473d20b3e | |||
dcefed950b | |||
0c83a8bc69 | |||
c6d1db4b36 | |||
663a6d9d2e | |||
73ea9f2702 | |||
c71b68ac45 | |||
c4568b47fd | |||
6cb13b0532 | |||
83a3e69a20 | |||
7295fde2e6 | |||
8e42ba4c80 | |||
331ab34bcd | |||
b161f670f8 | |||
dec5e4d330 | |||
6878d42720 | |||
461ae5551b | |||
62486a2206 | |||
92982609bc | |||
0265cede89 | |||
88c73d8a7a | |||
4ea49f6b82 | |||
1b526638b8 | |||
87f57a4b62 | |||
b34a9db1a1 | |||
471c5755fc | |||
c793417a3c | |||
dcbc1c881a | |||
66ad84f0f9 | |||
50ec3ab790 | |||
7d67a1646d | |||
e50650c77f | |||
671d2567d4 | |||
d67555809f | |||
28914528e1 | |||
8acb4942f7 |
@ -55,9 +55,6 @@ dsp_dec_srcs := \
|
||||
src/dsp/alpha_processing_neon.$(NEON) \
|
||||
src/dsp/alpha_processing_sse2.c \
|
||||
src/dsp/alpha_processing_sse41.c \
|
||||
src/dsp/argb.c \
|
||||
src/dsp/argb_mips_dsp_r2.c \
|
||||
src/dsp/argb_sse2.c \
|
||||
src/dsp/cpu.c \
|
||||
src/dsp/dec.c \
|
||||
src/dsp/dec_clip_tables.c \
|
||||
|
@ -4,17 +4,17 @@ project(libwebp C)
|
||||
|
||||
# Options for coder / decoder executables.
|
||||
option(WEBP_ENABLE_SIMD "Enable any SIMD optimization." ON)
|
||||
option(WEBP_ENABLE_WASM "Enable WebAssembly optimizations." OFF)
|
||||
option(WEBP_BUILD_CWEBP "Build the cwebp command line tool." OFF)
|
||||
option(WEBP_BUILD_DWEBP "Build the dwebp command line tool." OFF)
|
||||
option(WEBP_BUILD_GIF2WEBP "Build the gif2webp conversion tool." OFF)
|
||||
option(WEBP_BUILD_IMG2WEBP "Build the img2webp animation tool." OFF)
|
||||
option(WEBP_BUILD_WEBPINFO "Build the webpinfo command line tool." OFF)
|
||||
option(WEBP_BUILD_WEBP_JS "Emscripten build of webp.js." OFF)
|
||||
option(WEBP_ENABLE_NEAR_LOSSLESS "Enable near-lossless encoding" ON)
|
||||
option(WEBP_EXPERIMENTAL_FEATURES "Build with experimental features." OFF)
|
||||
option(WEBP_ENABLE_SWAP_16BIT_CSP "Enable byte swap for 16 bit colorspaces." OFF)
|
||||
|
||||
if(WEBP_BUILD_WEBP_JS OR WEBP_ENABLE_WASM)
|
||||
if(WEBP_BUILD_WEBP_JS)
|
||||
set(WEBP_ENABLE_SIMD OFF)
|
||||
endif()
|
||||
|
||||
@ -27,19 +27,13 @@ if(NOT CMAKE_BUILD_TYPE)
|
||||
)
|
||||
endif()
|
||||
|
||||
include(cmake/config.h.cmake)
|
||||
|
||||
# Extract the version of the library.
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/configure.ac SOURCE_FILE)
|
||||
string(REGEX MATCH "[0-9.]+" WEBP_VERSION ${SOURCE_FILE})
|
||||
# Include dependencies.
|
||||
include(cmake/deps.cmake)
|
||||
|
||||
################################################################################
|
||||
# Options.
|
||||
if(WEBP_ENABLE_SWAP_16BIT_CSP)
|
||||
add_definitions(-DWEBP_SWAP_16BIT_CSP)
|
||||
endif()
|
||||
if(WEBP_ENABLE_WASM)
|
||||
add_definitions(-DWEBP_USE_WASM)
|
||||
add_definitions(-DWEBP_SWAP_16BIT_CSP=1)
|
||||
endif()
|
||||
|
||||
################################################################################
|
||||
@ -54,7 +48,10 @@ if(ANDROID)
|
||||
set(WEBP_DEP_INCLUDE_DIRS ${WEBP_DEP_INCLUDE_DIRS}
|
||||
${ANDROID_NDK}/sources/android/cpufeatures
|
||||
)
|
||||
add_definitions(-DHAVE_CPU_FEATURES_H)
|
||||
add_definitions(-DHAVE_CPU_FEATURES_H=1)
|
||||
set(HAVE_CPU_FEATURES_H 1)
|
||||
else()
|
||||
set(HAVE_CPU_FEATURES_H 0)
|
||||
endif()
|
||||
|
||||
################################################################################
|
||||
@ -106,8 +103,13 @@ endforeach()
|
||||
|
||||
### Define the mandatory libraries.
|
||||
# Build the webpdecoder library.
|
||||
add_definitions(-Wall)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/ ${WEBP_DEP_INCLUDE_DIRS})
|
||||
if(MSVC)
|
||||
# avoid security warnings for e.g., fopen() used in the examples.
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
|
||||
else()
|
||||
add_definitions(-Wall)
|
||||
endif()
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${WEBP_DEP_INCLUDE_DIRS})
|
||||
add_library(webpdecode OBJECT ${WEBP_DEC_SRCS})
|
||||
add_library(webpdspdecode OBJECT ${WEBP_DSP_COMMON_SRCS} ${WEBP_DSP_DEC_SRCS})
|
||||
add_library(webputilsdecode OBJECT ${WEBP_UTILS_COMMON_SRCS}
|
||||
@ -145,13 +147,13 @@ function(parse_version FILE NAME VAR)
|
||||
set(${VAR} "${VERSION}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
parse_version(Makefile.am webp WEBP_WEBP_SOVERSION)
|
||||
set_target_properties(webp PROPERTIES VERSION ${WEBP_VERSION}
|
||||
set_target_properties(webp PROPERTIES VERSION ${PACKAGE_VERSION}
|
||||
SOVERSION ${WEBP_WEBP_SOVERSION})
|
||||
parse_version(Makefile.am webpdecoder WEBP_DECODER_SOVERSION)
|
||||
set_target_properties(webpdecoder PROPERTIES VERSION ${WEBP_VERSION}
|
||||
set_target_properties(webpdecoder PROPERTIES VERSION ${PACKAGE_VERSION}
|
||||
SOVERSION ${WEBP_DECODER_SOVERSION})
|
||||
parse_version(demux/Makefile.am webpdemux WEBP_DEMUX_SOVERSION)
|
||||
set_target_properties(webpdemux PROPERTIES VERSION ${WEBP_VERSION}
|
||||
set_target_properties(webpdemux PROPERTIES VERSION ${PACKAGE_VERSION}
|
||||
SOVERSION ${WEBP_DEMUX_SOVERSION})
|
||||
|
||||
# Define the libraries to install.
|
||||
@ -167,11 +169,9 @@ math(EXPR WEBP_SIMD_FILES_TO_INCLUDE_RANGE
|
||||
foreach(I_FILE RANGE ${WEBP_SIMD_FILES_TO_INCLUDE_RANGE})
|
||||
list(GET WEBP_SIMD_FILES_TO_INCLUDE ${I_FILE} FILE)
|
||||
list(GET WEBP_SIMD_FLAGS_TO_INCLUDE ${I_FILE} SIMD_COMPILE_FLAG)
|
||||
if(NOT ${SIMD_COMPILE_FLAG} STREQUAL "NOTFOUND")
|
||||
set_source_files_properties(${FILE} PROPERTIES
|
||||
COMPILE_FLAGS ${SIMD_COMPILE_FLAG}
|
||||
)
|
||||
endif()
|
||||
set_source_files_properties(${FILE} PROPERTIES
|
||||
COMPILE_FLAGS ${SIMD_COMPILE_FLAG}
|
||||
)
|
||||
endforeach()
|
||||
|
||||
# Build the executables if asked for.
|
||||
@ -200,6 +200,10 @@ if(WEBP_BUILD_CWEBP OR WEBP_BUILD_DWEBP OR
|
||||
"imageenc_[^ ]*")
|
||||
add_library(imageenc ${IMAGEENC_SRCS})
|
||||
target_link_libraries(imageenc webp)
|
||||
|
||||
set_property(TARGET exampleutil imageioutil imagedec imageenc
|
||||
PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_DWEBP)
|
||||
@ -210,6 +214,8 @@ if(WEBP_BUILD_DWEBP)
|
||||
add_executable(dwebp ${DWEBP_SRCS})
|
||||
target_link_libraries(dwebp exampleutil imagedec imageenc webpdecoder)
|
||||
install(TARGETS dwebp RUNTIME DESTINATION bin)
|
||||
set_property(TARGET dwebp PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_CWEBP)
|
||||
@ -220,6 +226,12 @@ if(WEBP_BUILD_CWEBP)
|
||||
add_executable(cwebp ${CWEBP_SRCS})
|
||||
target_link_libraries(cwebp exampleutil imagedec webp)
|
||||
install(TARGETS cwebp RUNTIME DESTINATION bin)
|
||||
set_property(TARGET cwebp PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_GIF2WEBP AND NOT GIF_FOUND)
|
||||
unset(WEBP_BUILD_GIF2WEBP CACHE)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_GIF2WEBP OR WEBP_BUILD_IMG2WEBP)
|
||||
@ -228,7 +240,7 @@ if(WEBP_BUILD_GIF2WEBP OR WEBP_BUILD_IMG2WEBP)
|
||||
add_library(webpmux ${WEBP_MUX_SRCS})
|
||||
target_link_libraries(webpmux webp)
|
||||
parse_version(mux/Makefile.am webpmux WEBP_MUX_SOVERSION)
|
||||
set_target_properties(webpmux PROPERTIES VERSION ${WEBP_VERSION}
|
||||
set_target_properties(webpmux PROPERTIES VERSION ${PACKAGE_VERSION}
|
||||
SOVERSION ${WEBP_MUX_SOVERSION})
|
||||
list(APPEND INSTALLED_LIBRARIES webpmux)
|
||||
endif()
|
||||
@ -242,6 +254,8 @@ if(WEBP_BUILD_GIF2WEBP)
|
||||
target_link_libraries(gif2webp exampleutil imageioutil webp webpmux
|
||||
${WEBP_DEP_GIF_LIBRARIES})
|
||||
install(TARGETS gif2webp RUNTIME DESTINATION bin)
|
||||
set_property(TARGET gif2webp PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_IMG2WEBP)
|
||||
@ -252,6 +266,8 @@ if(WEBP_BUILD_IMG2WEBP)
|
||||
add_executable(img2webp ${IMG2WEBP_SRCS})
|
||||
target_link_libraries(img2webp exampleutil imagedec imageioutil webp webpmux)
|
||||
install(TARGETS img2webp RUNTIME DESTINATION bin)
|
||||
set_property(TARGET img2webp PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if (WEBP_BUILD_WEBPINFO)
|
||||
@ -262,6 +278,8 @@ if (WEBP_BUILD_WEBPINFO)
|
||||
add_executable(webpinfo ${WEBPINFO_SRCS})
|
||||
target_link_libraries(webpinfo exampleutil imageioutil)
|
||||
install(TARGETS webpinfo RUNTIME DESTINATION bin)
|
||||
set_property(TARGET webpinfo PROPERTY INCLUDE_DIRECTORIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_WEBP_JS)
|
||||
@ -269,6 +287,7 @@ if(WEBP_BUILD_WEBP_JS)
|
||||
add_executable(webp_js
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/extras/webp_to_sdl.c)
|
||||
target_link_libraries(webp_js webpdecoder SDL)
|
||||
set(WEBP_HAVE_SDL 1)
|
||||
set_target_properties(webp_js PROPERTIES LINK_FLAGS
|
||||
"-s EXPORTED_FUNCTIONS='[\"_WebpToSDL\"]' -s INVOKE_RUN=0")
|
||||
set_target_properties(webp_js PROPERTIES OUTPUT_NAME webp)
|
||||
@ -286,6 +305,14 @@ if(WEBP_BUILD_WEBP_JS)
|
||||
target_compile_definitions(webpdecoder PUBLIC EMSCRIPTEN)
|
||||
endif()
|
||||
|
||||
# Generate the config.h file.
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/cmake/config.h.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/src/webp/config.h)
|
||||
add_definitions(-DHAVE_CONFIG_H)
|
||||
# The webp folder is included as we reference config.h as
|
||||
# ../webp/config.h or webp/config.h
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
# Install the different headers and libraries.
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/webp/decode.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/demux.h
|
||||
@ -302,7 +329,7 @@ install(TARGETS ${INSTALLED_LIBRARIES}
|
||||
include(CMakePackageConfigHelpers)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/WebPConfigVersion.cmake"
|
||||
VERSION ${WEBP_VERSION}
|
||||
VERSION ${PACKAGE_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
)
|
||||
|
||||
|
283
ChangeLog
283
ChangeLog
@ -1,9 +1,292 @@
|
||||
c10a493c vwebp: disable double buffering on windows & mac
|
||||
0d4466c2 webp_to_sdl.c: fix file mode
|
||||
1b27bf8b WEBP_REDUCE_SIZE: disable all rescaler code
|
||||
126be109 webpinfo: add -version option
|
||||
9add62b5 bump version to 0.6.1
|
||||
d3e26144 update NEWS
|
||||
2edda639 README: add webpinfo section
|
||||
9ca568ef Merge "right-size some tables"
|
||||
31f1995c Merge "SSE2 implementation of HasAlphaXXX"
|
||||
a80c46bd SSE2 implementation of HasAlphaXXX
|
||||
083507f2 right-size some tables
|
||||
2e5785b2 anim_utils.c: remove warning when !defined(WEBP_HAVE_GIF)
|
||||
b299c47e add WEBP_REDUCE_SIZE
|
||||
f593d71a enc: disable pic->stats/extra_info w/WEBP_DISABLE_STATS
|
||||
541179a9 Merge "predictor_enc: fix build w/--disable-near-lossless"
|
||||
5755a7ec predictor_enc: fix build w/--disable-near-lossless
|
||||
eab5bab7 add WEBP_DISABLE_STATS
|
||||
8052c585 remove some petty TODOs from vwebp.
|
||||
c245343d move LOAD8x4 and STORE8x2 closer to their use location
|
||||
b9e734fd dec,cosmetics: normalize function naming style
|
||||
c188d546 dec: harmonize function suffixes
|
||||
28c5ac81 dec_sse41: harmonize function suffixes
|
||||
e65b72a3 Merge "introduce WebPHasAlpha8b and WebPHasAlpha32b"
|
||||
b94cee98 dec_sse2: remove HE8uv_SSE2
|
||||
44a0ee3f introduce WebPHasAlpha8b and WebPHasAlpha32b
|
||||
aebf59ac Merge "WebPPictureAllocARGB: align argb allocation"
|
||||
c184665e WebPPictureAllocARGB: align argb allocation
|
||||
3daf7509 WebPParseHeaders: remove obsolete animation TODO
|
||||
80285d97 cmake: avoid security warnings under msvc
|
||||
650eac55 cmake: don't set -Wall with MSVC
|
||||
c462cd00 Remove useless code.
|
||||
01a98217 Merge "remove WebPWorkerImpl declaration from the header"
|
||||
3c49fc47 Merge "thread_utils: fix potentially bad call to Execute"
|
||||
fde2782e thread_utils: fix potentially bad call to Execute
|
||||
2a270c1d remove WebPWorkerImpl declaration from the header
|
||||
f1f437cc remove mention of 'lossy-only parameters' from the doc
|
||||
3879074d Merge "WebPMemToUint32: remove ptr cast to int"
|
||||
04b029d2 WebPMemToUint32: remove ptr cast to int
|
||||
b7971d0e dsp: avoid defining _C functions w/NEON builds
|
||||
6ba98764 webpdec: correct alloc size check w/use_argb
|
||||
5cfb3b0f normalize include guards
|
||||
f433205e Merge changes Ia17c7dfc,I75423abb,Ia2f716b4,I161caa14,I4210081a, ...
|
||||
8d033b14 {dec,enc}_neon: harmonize function suffixes x2
|
||||
0295e981 upsampling_neon: harmonize function suffixes
|
||||
d572c4e5 yuv_neon: harmonize function suffixes
|
||||
ab9c2500 rescaler_neon: harmonize function suffixes
|
||||
93e0ce27 lossless_neon: harmonize function suffixes
|
||||
22fbc50e lossless_enc_neon: harmonize function suffixes
|
||||
447875b4 filters_neon,cosmetics: fix indent
|
||||
e51bdd43 remove unused VP8TokenToStats() function
|
||||
785da7ea enc_neon: harmonize function suffixes
|
||||
bc1a251f dec_neon: harmonize function suffixes
|
||||
61e535f1 dsp/lossless: workaround gcc-4.8 bug on arm
|
||||
68b2eab7 cwebp: fix alpha reporting w/lossless & metadata
|
||||
30042faa WebPDemuxGetI: add doc details around WebPFormatFeature
|
||||
0a17f471 Merge "WIP: list includes as descendants of the project dir"
|
||||
a4399721 WIP: list includes as descendants of the project dir
|
||||
08275708 Merge "Make sure we reach the full range for alpha blending."
|
||||
d361a6a7 yuv_sse2: harmonize function suffixes
|
||||
6921aa6f upsampling_sse2: harmonize function suffixes
|
||||
08c67d3e ssim_sse2: harmonize function suffixes
|
||||
582a1b57 rescaler_sse2: harmonize function suffixes
|
||||
2c1b18ba lossless_sse2: harmonize function suffixes
|
||||
0ac46e81 lossless_enc_sse2: harmonize function suffixes
|
||||
bc634d57 enc_sse2: harmonize function suffixes
|
||||
bcb7347c dec_sse2: harmonize function suffixes
|
||||
e14ad93c Make sure we reach the full range for alpha blending.
|
||||
7038ca8d demux,StoreFrame: restore hdr size check to min req
|
||||
fb3daad6 cpu: fix ssse3 check
|
||||
be590e06 Merge "Fix CMake redefinition for HAVE_CPU_FEATURES_H"
|
||||
35f736e1 Fix CMake redefinition for HAVE_CPU_FEATURES_H
|
||||
a5216efc Fix integer overflow warning.
|
||||
a9c8916b decode.h,WebPIDecGetRGB: clarify output ptr validity
|
||||
3c74c645 gif2webp: handle 1-frame case properly + fix anim_diff
|
||||
c7f295d3 Merge "gif2webp: introduce -loop_compatibility option"
|
||||
b4e04677 gif2webp: introduce -loop_compatibility option
|
||||
f78da3de add LOCAL_CLANG_PREREQ and avoid WORK_AROUND_GCC w/3.8+
|
||||
01c426f1 define WEBP_USE_INTRINSICS w/gcc-4.9+
|
||||
8635973d use sdl-config (if available) to determine the link flags
|
||||
e9459382 use CPPFLAGS before CFLAGS
|
||||
4a9d788e Merge "Android.mk,mips: fix clang build with r15"
|
||||
4fbdc9fb Android.mk,mips: fix clang build with r15
|
||||
a80fcc4a ifdef code not used by Chrome/Android.
|
||||
3993af12 Fix signed integer overflows.
|
||||
f66f94ef anim_dump: small tool to dump frames from animated WebP
|
||||
6eba857b Merge "rationalize the Makefile.am"
|
||||
c5e34fba function definition cleanup
|
||||
3822762a rationalize the Makefile.am
|
||||
501ef6e4 configure style fix: animdiff -> anim_diff
|
||||
f8bdc268 Merge "protect against NULL dump_folder[] value in ReadAnimatedImage()"
|
||||
23bfc652 protect against NULL dump_folder[] value in ReadAnimatedImage()
|
||||
8dc3d71b cosmetics,ReadAnimatedWebP: correct function comment
|
||||
5bd40066 Merge changes I66a64a0a,I4d2e520f
|
||||
7945575c cosmetics,webpinfo: remove an else after a return
|
||||
8729fa11 cosmetics,cwebp: remove an else after a return
|
||||
f324b7f9 cosmetics: normalize fn proto & decl param names
|
||||
869eb369 CMake cleanups.
|
||||
289e62a3 Remove declaration of unimplemented VP8ApplyNearLosslessPredict
|
||||
20a94186 pnmdec,PAM: validate depth before calculating bytes_per_px
|
||||
34130afe anim_encode: fix integer overflow
|
||||
42c79aa6 Merge "Encoder: harmonize function suffixes"
|
||||
b09307dc Encoder: harmonize function suffixes
|
||||
bed0456d Merge "SSIM: harmonize the function suffix"
|
||||
54f6a3cf lossless_sse2.c: fix some missed suffix changes
|
||||
088f1dcc SSIM: harmonize the function suffix
|
||||
86fc4dd9 webpdec: use ImgIoUtilCheckSizeArgumentsOverflow
|
||||
08ea9ecd imageio: add ability restrict max image size
|
||||
6f9daa4a jpegdec,ReadError: fix leaks on error
|
||||
a0f72a4f VP8LTransformColorFunc: drop an non-respected 'const' from the signature.
|
||||
8c934902 Merge "Lossess dec: harmonize the function suffixes"
|
||||
622242aa Lossess dec: harmonize the function suffixes
|
||||
1411f027 Lossless Enc: harmonize the function suffixes
|
||||
24ad2e3c add const to two variables
|
||||
46efe062 Merge "Allow the lossless cruncher to work for alpha."
|
||||
8c3f9a47 Speed-up LZ77.
|
||||
1aef4c71 Allow the lossless cruncher to work for alpha.
|
||||
b8821dbd Improve the box LZ77 speed.
|
||||
7beed280 add missing ()s to macro parameters
|
||||
6473d20b Merge "fix Android standalone toolchain build"
|
||||
dcefed95 Merge "build.gradle: fix arm64 build"
|
||||
0c83a8bc Merge "yuv: harmonize suffix naming"
|
||||
c6d1db4b fix Android standalone toolchain build
|
||||
663a6d9d unify the ALTERNATE_CODE flag usage
|
||||
73ea9f27 yuv: harmonize suffix naming
|
||||
c71b68ac build.gradle: fix arm64 build
|
||||
c4568b47 Rescaler: harmonize the suffix naming
|
||||
6cb13b05 Merge "alpha_processing: harmonize the naming suffixes to be _C()"
|
||||
83a3e69a Merge "simplify WEBP_EXTERN macro"
|
||||
7295fde2 Merge "filters: harmonize the suffixes naming to _SSE2(), _C(), etc."
|
||||
8e42ba4c simplify WEBP_EXTERN macro
|
||||
331ab34b cost*.c: harmonize the suffix namings
|
||||
b161f670 filters: harmonize the suffixes naming to _SSE2(), _C(), etc.
|
||||
dec5e4d3 alpha_processing: harmonize the naming suffixes to be _C()
|
||||
6878d427 fix memory leak in SDL_Init()
|
||||
461ae555 Merge "configure: fix warnings in sdl check"
|
||||
62486a22 configure: test for -Wundef
|
||||
92982609 dsp.h: fix -Wundef w/__mips_dsp_rev
|
||||
0265cede configure: fix warnings in sdl check
|
||||
88c73d8a backward_references_enc.h: fix WINDOW_SIZE_BITS check
|
||||
4ea49f6b rescaler_sse2.c: fix WEBP_RESCALER_FIX -> _RFIX typo
|
||||
1b526638 Clean-up some CMake
|
||||
87f57a4b Merge "cmake: fix gif lib detection when cross compiling"
|
||||
b34a9db1 cosmetics,dec_sse2: remove some redundant comments
|
||||
471c5755 cmake: fix gif lib detection when cross compiling
|
||||
c793417a cmake: disable gif2webp if gif lib isn't found
|
||||
dcbc1c88 cmake: split gif detection from IMG deps
|
||||
66ad84f0 Merge "muxread: remove unreachable code"
|
||||
50ec3ab7 muxread: remove unreachable code
|
||||
7d67a164 Lossy encoding: smoothen transparent areas to improve compression
|
||||
e50650c7 Merge "fix signature for DISABLE_TOKEN_BUFFER compilation"
|
||||
671d2567 fix signature for DISABLE_TOKEN_BUFFER compilation
|
||||
d6755580 cpu.cmake: use unique flag to test simd disable flags
|
||||
28914528 Merge "Remove the argb* files."
|
||||
8acb4942 Remove the argb* files.
|
||||
3b62347b README: correct cmake invocation note
|
||||
7ca0df13 Have the SSE2 version of PackARGB use common code.
|
||||
7b250459 Merge "Re-use the transformed image when trying several LZ77 in lossless."
|
||||
e132072f Re-use the transformed image when trying several LZ77 in lossless.
|
||||
5d7a50ef Get code to compile in C++.
|
||||
7b012987 configure: test for -Wparentheses-equality
|
||||
f0569adb Fix man pages for multi-threading.
|
||||
f1d5a397 multithread cruncher: only copy stats when picture->stats != NULL
|
||||
f8c2ac15 Multi-thread the lossless cruncher.
|
||||
a88c6522 Merge "Integrate a new LZ77 looking for matches in the neighborhood of a pixel only."
|
||||
8f6df1d0 Unroll Predictors 10, 11 and 12.
|
||||
355c3d1b Integrate a new LZ77 looking for matches in the neighborhood of a pixel only.
|
||||
a1779a01 Refactor LZ77 handling in preparation for a new method.
|
||||
67de68b5 Android.mk/build.gradle: fix mips build with clang from r14b
|
||||
f209a548 Use the plane code and not the distance when computing statistics.
|
||||
b903b80c Split cost-based backward references in its own file.
|
||||
498cad34 Cosmetic changes in backward reference.
|
||||
e4eb4587 lossless, VP8LTransformColor_C: make sure no overflow happens with colors.
|
||||
af6deaff webpinfo: handle alpha flag mismatch
|
||||
7caef29b Fix typo that creeped in.
|
||||
39e19f92 Merge "near lossless: fix unsigned int overflow warnings."
|
||||
9bbc0891 near lossless: fix unsigned int overflow warnings.
|
||||
e1118d62 Merge "cosmetics,FindClosestDiscretized: use uint in mask creation"
|
||||
186bc9b7 Merge "webpinfo: tolerate ALPH+VP8L"
|
||||
b5887297 cosmetics,FindClosestDiscretized: use uint in mask creation
|
||||
f1784aee near_lossless,FindClosestDiscretized: use unsigned ops
|
||||
0d20abb3 webpinfo: tolerate ALPH+VP8L
|
||||
972104b3 webpmux: tolerate false positive Alpha flag
|
||||
dd7e83cc tiffdec,ReadTIFF: ensure data_size is < tsize_t max
|
||||
d988eb7b tiffdec,MyRead: quiet -Wshorten-64-to-32 warning
|
||||
dabda707 webpinfo: add support to parse Alpha bitstream
|
||||
4c117643 webpinfo: correct background color output, BGRA->ARGB
|
||||
defc98d7 Doc: clarify the role of quality in WebPConfig.
|
||||
d78ff780 Merge "Fix code to compile with C++."
|
||||
c8f14093 Fix code to compile with C++.
|
||||
497dc6a7 pnmdec: sanitize invalid header output
|
||||
d78e5867 Merge "configure: test for -Wconstant-conversion"
|
||||
481e91eb Merge "pnmdec,PAM: set bytes_per_px based on depth when missing"
|
||||
93b12753 configure: test for -Wconstant-conversion
|
||||
645f0c53 pnmdec,PAM: set bytes_per_px based on depth when missing
|
||||
e9154605 Merge "vwebp: activate GLUT double-buffering"
|
||||
818d795b vwebp: activate GLUT double-buffering
|
||||
d63e6f4b Add a man page for webpinfo
|
||||
4d708435 Merge "NEON: implement ConvertRGB24ToY/BGR24/ARGB/RGBA32ToUV/ARGBToUV"
|
||||
faf42213 NEON: implement ConvertRGB24ToY/BGR24/ARGB/RGBA32ToUV/ARGBToUV
|
||||
b4d576fa Install man pages with CMake.
|
||||
cbc1b921 webpinfo: add features to parse bitstream header
|
||||
e644c556 Fix bad bit writer initialization.
|
||||
b62cdad2 Merge "Implement a cruncher for lossless at method 6."
|
||||
da3e4dfb use the exact constant for the gamma transfer function
|
||||
a9c701e0 Merge "tiffdec: fix EXTRASAMPLES check"
|
||||
adab8ce0 Implement a cruncher for lossless at method 6.
|
||||
1b92b237 Merge "Fix VP8ApplyNearLossless to respect const and stride."
|
||||
1923ff02 tiffdec: fix EXTRASAMPLES check
|
||||
97cce5ba tiffdec: only request EXTRASAMPLES w/> 3 samples/px
|
||||
0dcd85b6 Fix VP8ApplyNearLossless to respect const and stride.
|
||||
f7682189 yuv: rationalize the C/SSE2 function naming
|
||||
52245424 NEON implementation of some Sharp-YUV420 functions
|
||||
690efd82 Avoid several backward reference copies.
|
||||
4bb1f607 src/dec/vp8_dec.h, cosmetics: fix comments
|
||||
285748be cmake: build/install webpinfo
|
||||
78fd199c backward_references_enc.c: clear -Wshadow warnings
|
||||
ae836410 WebPLog2FloorC: clear -Wshadow warning
|
||||
d0b7404e Merge "WASM support"
|
||||
134e314f WASM support
|
||||
c08adb6f Merge "VP8LEnc: remove use of BitsLog2Ceiling()"
|
||||
28c37ebd VP8LEnc: remove use of BitsLog2Ceiling()
|
||||
2cb58ab2 webpinfo: output format as a human readable string
|
||||
bb175a93 Merge "rename some symbols clashing with MSVC headers"
|
||||
39eda658 Remove a duplicated pixel hash implementation.
|
||||
36b8274d rename some symbols clashing with MSVC headers
|
||||
274daf54 Add webpinfo tool.
|
||||
ec5036e4 add explicit reference to /usr/local/{lib,inc}
|
||||
18f0dfac Merge "fix TIFF encoder regarding rgbA/RGBA"
|
||||
4e2b0b50 Merge "webpdec.h: fix a doc typo"
|
||||
e2eeabff Merge "Install binaries, libraries and headers in CMake."
|
||||
836607e6 webpdec.h: fix a doc typo
|
||||
9273e441 fix TIFF encoder regarding rgbA/RGBA
|
||||
17e3c11f Add limited PAM decoding support
|
||||
5f624871 Install binaries, libraries and headers in CMake.
|
||||
976adac1 Merge "lossless incremental decoding: fix missing eos_ test"
|
||||
f8fad4fa lossless incremental decoding: fix missing eos_ test
|
||||
27415d41 Merge "vwebp_sdl: fix the makefile.unix"
|
||||
49566182 Merge "ImgIoUtilWriteFile(): use ImgIoUtilSetBinaryMode"
|
||||
6f75a51b Analyze the transform entropy on the whole image.
|
||||
a5e4e3af Use palette only if we can in entropy analysis.
|
||||
75a9c3c4 Improve compression by better entropy analysis.
|
||||
39cf6f4f vwebp_sdl: fix the makefile.unix
|
||||
699b0416 ImgIoUtilWriteFile(): use ImgIoUtilSetBinaryMode
|
||||
7d985bd1 Fix small entropy analysis bug.
|
||||
6e7caf06 Optimize the color cache size.
|
||||
833c9219 More efficient stochastic histogram merge.
|
||||
5183326b Refactor the greedy histogram merge.
|
||||
99f6f462 Merge "histogram_enc.c,MyRand: s/ul/u/ for unsigned constants"
|
||||
80a22186 ssim.c: remove dead include
|
||||
a128dfff histogram_enc.c,MyRand: s/ul/u/ for unsigned constants
|
||||
693bf74e move the SSIM calculation code in ssim.c / ssim_sse2.c
|
||||
10d791ca Merge "Fix the random generator in HistogramCombineStochastic."
|
||||
fa63a966 Fix the random generator in HistogramCombineStochastic.
|
||||
16be192f VP8LSetBitPos: remove the eos_ setting
|
||||
027151ca don't erase the surface before blitting.
|
||||
4105d565 disable WEBP_USE_XXX optimisations when EMSCRIPTEN is defined
|
||||
9ee32a75 Merge "WebP-JS: emscripten-based Javascript decoder"
|
||||
ca9f7b7d WebP-JS: emscripten-based Javascript decoder
|
||||
868aa690 Perform greedy histogram merge in a unified way.
|
||||
5b393f2d Merge "fix path typo for vwebp_sdl in Makefile.vc"
|
||||
e0012bea CMake: only use libwebpdecoder for building dwebp
|
||||
84c2a7b0 fix path typo for vwebp_sdl in Makefile.vc
|
||||
1b0e4abf Merge "Add a flag to disable SIMD optimizations."
|
||||
32263250 Add a flag to disable SIMD optimizations.
|
||||
b494fdec optimize the ARGB->ARGB Import to use memcpy
|
||||
f1536039 Merge "ReadWebP: decode directly into a pre-allocated buffer"
|
||||
e69ed291 ReadWebP: decode directly into a pre-allocated buffer
|
||||
57d8de8a Merge "vwebp_sdl: simple viewer based on SDL"
|
||||
5cfd4ebc LZ77 interval speedups. Faster, smaller, simpler.
|
||||
1e7ad88b PNM header decoder: add some basic numerical validation
|
||||
17c7890c Merge "Add a decoder only library for WebP in CMake."
|
||||
be733786 Merge "Add clang build fix for MSA"
|
||||
03cda0e4 Add a decoder only library for WebP in CMake.
|
||||
aa893914 Add clang build fix for MSA
|
||||
31a92e97 Merge "imageio: add limited PNM support for reading"
|
||||
dcf9d82a imageio: add limited PNM support for reading
|
||||
6524fcd6 vwebp_sdl: simple viewer based on SDL
|
||||
6cf24a24 get_disto: fix reference file read
|
||||
43d472aa Merge tag 'v0.6.0'
|
||||
50d1a848 update ChangeLog (tag: v0.6.0, origin/0.6.0, 0.6.0)
|
||||
20a7fea0 extras/Makefile.am: fix libwebpextras.la reference
|
||||
415f3ffe update ChangeLog (tag: v0.6.0-rc3)
|
||||
3c6d1224 update NEWS
|
||||
ee4a4141 update AUTHORS
|
||||
32ed856f Fix "all|no frames are keyframes" settings.
|
||||
1c3190b6 Merge "Fix "all|no frames are keyframes" settings."
|
||||
f4dc56fd disable GradientUnfilter_NEON
|
||||
4f3e3bbd disable GradientUnfilter_NEON
|
||||
2dc0bdca Fix "all|no frames are keyframes" settings.
|
||||
0d8e0588 img2webp: treat -loop as a no-op w/single images
|
||||
b0450139 ReadImage(): restore size reporting
|
||||
0ad3b4ef update ChangeLog (tag: v0.6.0-rc2)
|
||||
|
16
Makefile.vc
16
Makefile.vc
@ -29,7 +29,7 @@ PLATFORM_LDFLAGS = /SAFESEH
|
||||
NOLOGO = /nologo
|
||||
CCNODBG = cl.exe $(NOLOGO) /O2 /DNDEBUG
|
||||
CCDEBUG = cl.exe $(NOLOGO) /Od /Gm /Zi /D_DEBUG /RTC1
|
||||
CFLAGS = /Isrc $(NOLOGO) /W3 /EHsc /c
|
||||
CFLAGS = /I. /Isrc $(NOLOGO) /W3 /EHsc /c
|
||||
CFLAGS = $(CFLAGS) /DWIN32 /D_CRT_SECURE_NO_WARNINGS /DWIN32_LEAN_AND_MEAN
|
||||
LDFLAGS = /LARGEADDRESSAWARE /MANIFEST /NXCOMPAT /DYNAMICBASE
|
||||
LDFLAGS = $(LDFLAGS) $(PLATFORM_LDFLAGS)
|
||||
@ -155,6 +155,7 @@ CFGSET = TRUE
|
||||
!MESSAGE - all - build (de)mux-based targets for CFG
|
||||
!MESSAGE - gif2webp - requires libgif & >= VS2013
|
||||
!MESSAGE - anim_diff - requires libgif & >= VS2013
|
||||
!MESSAGE - anim_dump
|
||||
!MESSAGE
|
||||
!MESSAGE RTLIBCFG controls the runtime library linkage - 'static' or 'dynamic'.
|
||||
!MESSAGE 'legacy' will produce a Windows 2000 compatible library.
|
||||
@ -233,9 +234,6 @@ DSP_DEC_OBJS = \
|
||||
$(DIROBJ)\dsp\yuv_sse2.obj \
|
||||
|
||||
DSP_ENC_OBJS = \
|
||||
$(DIROBJ)\dsp\argb.obj \
|
||||
$(DIROBJ)\dsp\argb_mips_dsp_r2.obj \
|
||||
$(DIROBJ)\dsp\argb_sse2.obj \
|
||||
$(DIROBJ)\dsp\cost.obj \
|
||||
$(DIROBJ)\dsp\cost_mips32.obj \
|
||||
$(DIROBJ)\dsp\cost_mips_dsp_r2.obj \
|
||||
@ -358,10 +356,15 @@ all: ex $(EXTRA_EXAMPLES)
|
||||
# C99 support which is only available from VS2013 onward.
|
||||
gif2webp: $(DIRBIN)\gif2webp.exe
|
||||
anim_diff: $(DIRBIN)\anim_diff.exe
|
||||
anim_dump: $(DIRBIN)\anim_dump.exe
|
||||
|
||||
$(DIRBIN)\anim_diff.exe: $(DIROBJ)\examples\anim_diff.obj $(EX_ANIM_UTIL_OBJS)
|
||||
$(DIRBIN)\anim_diff.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS)
|
||||
$(DIRBIN)\anim_diff.exe: $(EX_GIF_DEC_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
|
||||
$(DIRBIN)\anim_dump.exe: $(DIROBJ)\examples\anim_dump.obj $(EX_ANIM_UTIL_OBJS)
|
||||
$(DIRBIN)\anim_dump.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS)
|
||||
$(DIRBIN)\anim_dump.exe: $(EX_GIF_DEC_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
|
||||
$(DIRBIN)\anim_dump.exe: $(IMAGEIO_ENC_OBJS)
|
||||
$(DIRBIN)\cwebp.exe: $(DIROBJ)\examples\cwebp.obj $(IMAGEIO_DEC_OBJS)
|
||||
$(DIRBIN)\cwebp.exe: $(IMAGEIO_UTIL_OBJS)
|
||||
$(DIRBIN)\dwebp.exe: $(DIROBJ)\examples\dwebp.obj $(IMAGEIO_DEC_OBJS)
|
||||
@ -444,7 +447,7 @@ $(OUTPUT_DIRS):
|
||||
$(DIROBJ)\$(DLLINC):
|
||||
@echo #ifndef WEBP_DLL_H_ > $@
|
||||
@echo #define WEBP_DLL_H_ >> $@
|
||||
@echo #define WEBP_EXTERN(type) __declspec(dllexport) type >> $@
|
||||
@echo #define WEBP_EXTERN __declspec(dllexport) >> $@
|
||||
@echo #endif /* WEBP_DLL_H_ */ >> $@
|
||||
|
||||
.SUFFIXES: .c .obj .res .exe
|
||||
@ -456,6 +459,9 @@ $(DIROBJ)\dsp\enc_avx2.obj: src\dsp\enc_avx2.c
|
||||
$(DIROBJ)\examples\anim_diff.obj: examples\anim_diff.c
|
||||
$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
|
||||
/Fo$(DIROBJ)\examples\ examples\$(@B).c
|
||||
$(DIROBJ)\examples\anim_dump.obj: examples\anim_dump.c
|
||||
$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
|
||||
/Fo$(DIROBJ)\examples\ examples\$(@B).c
|
||||
$(DIROBJ)\examples\anim_util.obj: examples\anim_util.c
|
||||
$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
|
||||
/Fo$(DIROBJ)\examples\ examples\$(@B).c
|
||||
|
13
NEWS
13
NEWS
@ -1,3 +1,16 @@
|
||||
- 11/24/2017: version 0.6.1
|
||||
This is a binary compatible release.
|
||||
* lossless performance and compression improvements + a new 'cruncher' mode
|
||||
(-m 6 -q 100)
|
||||
* ARM performance improvements with clang (15-20% w/ndk r15c, issue #339)
|
||||
* webp-js: emscripten/webassembly based javascript decoder
|
||||
* miscellaneous bug & build fixes (issue #329, #332, #343, #353, #360, #361,
|
||||
#363)
|
||||
Tool updates / additions:
|
||||
added webpinfo - prints file format information (issue #330)
|
||||
gif2webp - loop behavior modified to match Chrome M63+ (crbug.com/649264);
|
||||
'-loop_compatibility' can be used for the old behavior
|
||||
|
||||
- 1/26/2017: version 0.6.0
|
||||
* lossless performance and compression improvements
|
||||
* miscellaneous performance improvements (SSE2, NEON, MSA)
|
||||
|
25
README
25
README
@ -4,7 +4,7 @@
|
||||
\__\__/\____/\_____/__/ ____ ___
|
||||
/ _/ / \ \ / _ \/ _/
|
||||
/ \_/ / / \ \ __/ \__
|
||||
\____/____/\_____/_____/____/v0.6.0
|
||||
\____/____/\_____/_____/____/v0.6.1
|
||||
|
||||
Description:
|
||||
============
|
||||
@ -113,8 +113,8 @@ make install
|
||||
|
||||
CMake:
|
||||
------
|
||||
With CMake, you can compile libwebp, cwebp, dwebp, gif2web, img2webp and the
|
||||
JS bindings.
|
||||
With CMake, you can compile libwebp, cwebp, dwebp, gif2web, img2webp, webpinfo
|
||||
and the JS bindings.
|
||||
|
||||
Prerequisites:
|
||||
A compiler (e.g., gcc with autotools) and CMake.
|
||||
@ -367,6 +367,23 @@ Use following options to convert into alternate image formats:
|
||||
-quiet ....... quiet mode, don't print anything
|
||||
-noasm ....... disable all assembly optimizations
|
||||
|
||||
WebP file analysis tool:
|
||||
========================
|
||||
|
||||
'webpinfo' can be used to print out the chunk level structure and bitstream
|
||||
header information of WebP files. It can also check if the files are of valid
|
||||
WebP format.
|
||||
|
||||
Usage: webpinfo [options] in_files
|
||||
Note: there could be multiple input files;
|
||||
options must come before input files.
|
||||
Options:
|
||||
-version ........... Print version number and exit.
|
||||
-quiet ............. Do not show chunk parsing information.
|
||||
-diag .............. Show parsing error diagnosis.
|
||||
-summary ........... Show chunk stats summary.
|
||||
-bitstream_info .... Parse bitstream header.
|
||||
|
||||
Visualization tool:
|
||||
===================
|
||||
|
||||
@ -477,6 +494,8 @@ Options:
|
||||
-metadata <string> ..... comma separated list of metadata to
|
||||
copy from the input to the output if present
|
||||
Valid values: all, none, icc, xmp (default)
|
||||
-loop_compatibility .... use compatibility mode for Chrome
|
||||
version prior to M62 (inclusive)
|
||||
-mt .................... use multi-threading if available
|
||||
|
||||
-version ............... print version number and exit
|
||||
|
@ -1,7 +1,7 @@
|
||||
__ __ ____ ____ ____ __ __ _ __ __
|
||||
/ \\/ \/ _ \/ _ \/ _ \/ \ \/ \___/_ / _\
|
||||
\ / __/ _ \ __/ / / (_/ /__
|
||||
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v0.4.0
|
||||
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v0.4.1
|
||||
|
||||
|
||||
Description:
|
||||
|
91
README.wasm
91
README.wasm
@ -1,91 +0,0 @@
|
||||
Description:
|
||||
============
|
||||
|
||||
This file describes the compilation of libwebp using portable intrinsics /
|
||||
WebAssembly (wasm) to native targets using clang and CMake.
|
||||
|
||||
Prerequisites:
|
||||
==============
|
||||
|
||||
- cmake 2.8+
|
||||
|
||||
- clang 3.9+ for portable intrinsics support; as wasm progresses a tip of tree
|
||||
build may be necessary.
|
||||
|
||||
Building:
|
||||
=========
|
||||
|
||||
- configure the project with CMake using:
|
||||
|
||||
$ mkdir -p build && \
|
||||
cd build && \
|
||||
cmake -DWEBP_BUILD_DWEBP=1 -DCMAKE_C_COMPILER=clang -DWEBP_ENABLE_WASM=1 ../
|
||||
|
||||
- compile dwebp using 'make'.
|
||||
|
||||
- Note this currently generates native executables only and is incompatible
|
||||
with -DWEBP_BUILD_WEBP_JS.
|
||||
|
||||
Build options:
|
||||
==============
|
||||
|
||||
- platform specific multiply high (mulhi) implementation, disabled by default.
|
||||
arm: -DCMAKE_C_FLAGS='-DENABLE_NEON_BUILTIN_MULHI_INT16X8 ...'
|
||||
x86: -DCMAKE_C_FLAGS='-DENABLE_X86_BUILTIN_MULHI_INT16X8 ...'
|
||||
|
||||
Cross compilation:
|
||||
==================
|
||||
|
||||
- arm toolchains can be obtained from:
|
||||
http://www.linaro.org/downloads/
|
||||
|
||||
- the android ndk can be obtained from:
|
||||
https://developer.android.com/ndk/downloads/index.html
|
||||
|
||||
armv7:
|
||||
------
|
||||
|
||||
Android:
|
||||
$ ./android-ndk-r15b/build/tools/make_standalone_toolchain.py \
|
||||
--arch arm --api 24 --stl gnustl --install-dir /opt/android-arm-24
|
||||
$ mkdir -p build && cd build
|
||||
$ cmake ../libwebp \
|
||||
-DWEBP_BUILD_DWEBP=1 \
|
||||
-DCMAKE_C_COMPILER=/opt/android-arm-24/bin/clang \
|
||||
-DCMAKE_PREFIX_PATH=/opt/android-arm-24/sysroot/usr/lib \
|
||||
-DCMAKE_C_FLAGS=-fPIE \
|
||||
-DCMAKE_EXE_LINKER_FLAGS=-Wl,-pie \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DWEBP_ENABLE_WASM=1
|
||||
|
||||
Linux:
|
||||
$ gcc_arm=/opt/gcc-arm; target=arm-linux-gnueabihf
|
||||
$ mkdir -p build && cd build
|
||||
$ cmake ../libwebp -DWEBP_BUILD_DWEBP=1 -DWEBP_ENABLE_WASM=1 \
|
||||
-DCMAKE_C_COMPILER=clang \
|
||||
-DCMAKE_C_FLAGS="--target=$target --gcc-toolchain=$gcc_arm --sysroot=$gcc_arm/$target/libc -march=armv7-a -mfpu=neon" \
|
||||
-DCMAKE_PREFIX_PATH=$gcc_arm/$target/libc/usr
|
||||
|
||||
aarch64 / arm64:
|
||||
----------------
|
||||
|
||||
Android:
|
||||
$ ./android-ndk-r15b/build/tools/make_standalone_toolchain.py \
|
||||
--arch arm64 --api 24 --stl gnustl --install-dir /opt/android-arm64-24
|
||||
$ mkdir -p build && cd build
|
||||
$ cmake ../libwebp \
|
||||
-DWEBP_BUILD_DWEBP=1 \
|
||||
-DCMAKE_C_COMPILER=/opt/android-arm64-24/bin/clang \
|
||||
-DCMAKE_PREFIX_PATH=/opt/android-arm64-24/sysroot/usr/lib \
|
||||
-DCMAKE_C_FLAGS=-fPIE \
|
||||
-DCMAKE_EXE_LINKER_FLAGS=-Wl,-pie \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DWEBP_ENABLE_WASM=1
|
||||
|
||||
Linux:
|
||||
$ gcc_arm=/opt/gcc-aarch64; target=aarch64-linux-gnu
|
||||
$ mkdir -p build && cd build
|
||||
$ cmake ../libwebp -DWEBP_BUILD_DWEBP=1 -DWEBP_ENABLE_WASM=1 \
|
||||
-DCMAKE_C_COMPILER=clang \
|
||||
-DCMAKE_C_FLAGS="--target=$target --gcc-toolchain=$gcc_arm --sysroot=$gcc_arm/$target/libc" \
|
||||
-DCMAKE_PREFIX_PATH=$gcc_arm/$target/libc/usr
|
@ -31,11 +31,6 @@ using Emscripten and CMake.
|
||||
- that's it! Upon completion, you should have the webp.js and
|
||||
webp.js.mem files generated.
|
||||
|
||||
- Note this generates both webp_js and webp_wasm without any SIMD enabled due
|
||||
to bugs with this toolchain associated with the SSE2 code.
|
||||
-DWEBP_ENABLE_WASM is currently meant to generate native (x86, arm)
|
||||
executables (dwebp, cwebp) and is incompatible with -DWEBP_BUILD_WEBP_JS.
|
||||
|
||||
The callable JavaScript function is WebPToSDL(), which decodes a raw WebP
|
||||
bitstream into a canvas. See webp_js/index.html for a simple usage sample.
|
||||
|
||||
|
@ -82,12 +82,14 @@ model {
|
||||
}
|
||||
}
|
||||
// Check for NEON usage.
|
||||
if (getTargetPlatform() == "arm" || getTargetPlatform() == "arm64") {
|
||||
if (getTargetPlatform() == "arm") {
|
||||
NEON = "c.neon"
|
||||
cCompiler.define "HAVE_CPU_FEATURES_H"
|
||||
} else {
|
||||
NEON = "c"
|
||||
}
|
||||
|
||||
cCompiler.args "-I" + file(".").absolutePath
|
||||
}
|
||||
// Link to pthread for shared libraries.
|
||||
withType(SharedLibraryBinarySpec) {
|
||||
@ -120,9 +122,6 @@ model {
|
||||
include "alpha_processing_neon.$NEON"
|
||||
include "alpha_processing_sse2.c"
|
||||
include "alpha_processing_sse41.c"
|
||||
include "argb.c"
|
||||
include "argb_mips_dsp_r2.c"
|
||||
include "argb_sse2.c"
|
||||
include "cpu.c"
|
||||
include "dec.c"
|
||||
include "dec_clip_tables.c"
|
||||
|
@ -13,6 +13,9 @@
|
||||
/* Set to 1 if __builtin_bswap64 is available */
|
||||
#cmakedefine HAVE_BUILTIN_BSWAP64 1
|
||||
|
||||
/* Define to 1 if you have the <cpu-features.h> header file. */
|
||||
#cmakedefine HAVE_CPU_FEATURES_H 1
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#cmakedefine HAVE_DLFCN_H 1
|
||||
|
||||
@ -115,9 +118,19 @@
|
||||
/* Set to 1 if JPEG library is installed */
|
||||
#cmakedefine WEBP_HAVE_JPEG 1
|
||||
|
||||
/* Set to 1 if NEON is supported */
|
||||
#cmakedefine WEBP_HAVE_NEON
|
||||
|
||||
/* Set to 1 if runtime detection of NEON is enabled */
|
||||
/* TODO: handle properly in CMake */
|
||||
#cmakedefine WEBP_HAVE_NEON_RTCD
|
||||
|
||||
/* Set to 1 if PNG library is installed */
|
||||
#cmakedefine WEBP_HAVE_PNG 1
|
||||
|
||||
/* Set to 1 if SDL library is installed */
|
||||
#cmakedefine WEBP_HAVE_SDL 1
|
||||
|
||||
/* Set to 1 if SSE2 is supported */
|
||||
#cmakedefine WEBP_HAVE_SSE2 1
|
||||
|
||||
@ -127,6 +140,9 @@
|
||||
/* Set to 1 if TIFF library is installed */
|
||||
#cmakedefine WEBP_HAVE_TIFF 1
|
||||
|
||||
/* Enable near lossless encoding */
|
||||
#cmakedefine WEBP_NEAR_LOSSLESS 1
|
||||
|
||||
/* Undefine this to disable thread support. */
|
||||
#cmakedefine WEBP_USE_THREAD 1
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
## Check for SIMD extensions.
|
||||
include(CMakePushCheckState)
|
||||
|
||||
function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
|
||||
if(NOT ENABLE_SIMD)
|
||||
@ -7,6 +8,8 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
|
||||
return()
|
||||
endif()
|
||||
unset(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG} CACHE)
|
||||
cmake_push_check_state()
|
||||
set(CMAKE_REQUIRED_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
check_c_source_compiles("
|
||||
#include \"${CMAKE_CURRENT_LIST_DIR}/../src/dsp/dsp.h\"
|
||||
int main(void) {
|
||||
@ -17,6 +20,7 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
|
||||
}
|
||||
" WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG}
|
||||
)
|
||||
cmake_pop_check_state()
|
||||
if(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG})
|
||||
set(WEBP_HAVE_${WEBP_SIMD_FLAG} 1 PARENT_SCOPE)
|
||||
else()
|
||||
@ -60,6 +64,7 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
|
||||
# First try with no extra flag added as the compiler might have default flags
|
||||
# (especially on Android).
|
||||
unset(WEBP_HAVE_${WEBP_SIMD_FLAG} CACHE)
|
||||
cmake_push_check_state()
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
webp_check_compiler_flag(${WEBP_SIMD_FLAG} ${WEBP_ENABLE_SIMD})
|
||||
if(NOT WEBP_HAVE_${WEBP_SIMD_FLAG})
|
||||
@ -85,11 +90,8 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
|
||||
foreach(FILE ${SIMD_FILES})
|
||||
list(APPEND WEBP_SIMD_FILES_NOT_TO_INCLUDE ${FILE})
|
||||
endforeach()
|
||||
# Explicitly disable SIMD. Avoid this with WASM to avoid an ICE with clang:
|
||||
# https://bugs.chromium.org/p/webp/issues/detail?id=350
|
||||
# WASM overrides the native SIMD so building it in is harmless aside from
|
||||
# binary size.
|
||||
if(NOT WEBP_ENABLE_WASM AND SIMD_DISABLE_FLAGS)
|
||||
# Explicitly disable SIMD.
|
||||
if(SIMD_DISABLE_FLAGS)
|
||||
list(GET SIMD_DISABLE_FLAGS ${I_SIMD} SIMD_COMPILE_FLAG)
|
||||
include(CheckCCompilerFlag)
|
||||
if(SIMD_COMPILE_FLAG)
|
||||
@ -104,11 +106,12 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
|
||||
set(COMMON_PATTERNS)
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_DEFINITIONS ${SIMD_COMPILE_FLAG})
|
||||
check_c_source_compiles("int main(void) {return 0;}" FLAG2
|
||||
check_c_source_compiles("int main(void) {return 0;}"
|
||||
FLAG_${SIMD_COMPILE_FLAG}
|
||||
FAIL_REGEX "warning: argument unused during compilation:"
|
||||
${COMMON_PATTERNS}
|
||||
)
|
||||
if(NOT FLAG2)
|
||||
if(NOT FLAG_${SIMD_COMPILE_FLAG})
|
||||
unset(HAS_COMPILE_FLAG CACHE)
|
||||
endif()
|
||||
endif()
|
||||
@ -118,14 +121,5 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
cmake_pop_check_state()
|
||||
endforeach()
|
||||
|
||||
## Add *_wasm.c files if enabled.
|
||||
if(WEBP_ENABLE_WASM)
|
||||
file(GLOB SIMD_FILES "${CMAKE_CURRENT_LIST_DIR}/../"
|
||||
"src/dsp/*_wasm.c"
|
||||
)
|
||||
foreach(FILE ${SIMD_FILES})
|
||||
list(APPEND WEBP_SIMD_FILES_TO_INCLUDE ${FILE})
|
||||
endforeach()
|
||||
endif()
|
||||
|
@ -70,18 +70,43 @@ foreach(I_LIB PNG JPEG TIFF)
|
||||
set(WEBP_HAVE_${I_LIB} ${${I_LIB}_FOUND})
|
||||
if(${I_LIB}_FOUND)
|
||||
list(APPEND WEBP_DEP_IMG_LIBRARIES ${${I_LIB}_LIBRARIES})
|
||||
list(APPEND WEBP_DEP_IMG_INCLUDE_DIRS ${${I_LIB}_INCLUDE_DIRS})
|
||||
list(APPEND WEBP_DEP_IMG_INCLUDE_DIRS
|
||||
${${I_LIB}_INCLUDE_DIR} ${${I_LIB}_INCLUDE_DIRS})
|
||||
endif()
|
||||
endforeach()
|
||||
if(WEBP_DEP_IMG_INCLUDE_DIRS)
|
||||
list(REMOVE_DUPLICATES WEBP_DEP_IMG_INCLUDE_DIRS)
|
||||
endif()
|
||||
|
||||
# GIF detection, gifdec isn't part of the imageio lib.
|
||||
include(CMakePushCheckState)
|
||||
set(WEBP_DEP_GIF_LIBRARIES)
|
||||
set(WEBP_DEP_GIF_INCLUDE_DIRS)
|
||||
find_package(GIF)
|
||||
set(WEBP_HAVE_GIF ${GIF_FOUND})
|
||||
if(GIF_FOUND)
|
||||
list(APPEND WEBP_DEP_GIF_LIBRARIES ${GIF_LIBRARIES})
|
||||
list(APPEND WEBP_DEP_GIF_INCLUDE_DIRS ${GIF_INCLUDE_DIR})
|
||||
# GIF find_package only locates the header and library, it doesn't fail
|
||||
# compile tests when detecting the version, but falls back to 3 (as of at
|
||||
# least cmake 3.7.2). Make sure the library links to avoid incorrect
|
||||
# detection when cross compiling.
|
||||
cmake_push_check_state()
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${GIF_LIBRARIES})
|
||||
set(CMAKE_REQUIRED_INCLUDES ${GIF_INCLUDE_DIR})
|
||||
check_c_source_compiles("
|
||||
#include <gif_lib.h>
|
||||
int main(void) {
|
||||
(void)DGifOpenFileHandle;
|
||||
return 0;
|
||||
}
|
||||
" GIF_COMPILES
|
||||
)
|
||||
cmake_pop_check_state()
|
||||
if(GIF_COMPILES)
|
||||
list(APPEND WEBP_DEP_GIF_LIBRARIES ${GIF_LIBRARIES})
|
||||
list(APPEND WEBP_DEP_GIF_INCLUDE_DIRS ${GIF_INCLUDE_DIR})
|
||||
else()
|
||||
unset(GIF_FOUND)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
## Check for specific headers.
|
||||
@ -139,13 +164,3 @@ strip_bracket(PACKAGE_URL)
|
||||
set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
|
||||
set(PACKAGE_TARNAME ${PACKAGE_NAME})
|
||||
set(VERSION ${PACKAGE_VERSION})
|
||||
|
||||
## Generate the config.h header.
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/config.h.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/include/webp/config.h)
|
||||
add_definitions(-DHAVE_CONFIG_H)
|
||||
# The webp folder is included as we reference config.h as
|
||||
# ../webp/config.h or webp/config.h
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR}/include
|
||||
${CMAKE_CURRENT_BINARY_DIR}/include/webp
|
||||
)
|
38
configure.ac
38
configure.ac
@ -1,4 +1,4 @@
|
||||
AC_INIT([libwebp], [0.6.0],
|
||||
AC_INIT([libwebp], [0.6.1],
|
||||
[https://bugs.chromium.org/p/webp],,
|
||||
[http://developers.google.com/speed/webp])
|
||||
AC_CANONICAL_HOST
|
||||
@ -79,6 +79,7 @@ TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wold-style-definition])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wparentheses-equality])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshadow])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshorten-64-to-32])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wundef])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunreachable-code])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused-but-set-variable])
|
||||
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused])
|
||||
@ -444,12 +445,12 @@ AS_IF([test "x$enable_sdl" != "xno"], [
|
||||
CLEAR_LIBVARS([SDL])
|
||||
WITHLIB_OPTION([sdl], [SDL])
|
||||
|
||||
$sdl_header = "no";
|
||||
sdl_header="no"
|
||||
LIBCHECK_PROLOGUE([SDL])
|
||||
AC_CHECK_HEADER([SDL/SDL.h], [sdl_header="SDL_SDL.h"],
|
||||
[AC_CHECK_HEADER([SDL.h], [sdl_header="SDL.h"],
|
||||
[AC_MSG_WARN(SDL library not available - no sdl.h)])])
|
||||
if test x"$sdl_header" != "xno" ; then
|
||||
if test x"$sdl_header" != "xno"; then
|
||||
AC_CHECK_LIB(SDL, SDL_Init,
|
||||
[SDL_LIBS="-lSDL"
|
||||
SDL_INCLUDES="-DWEBP_HAVE_SDL"
|
||||
@ -458,14 +459,14 @@ AS_IF([test "x$enable_sdl" != "xno"], [
|
||||
sdl_support=yes
|
||||
],
|
||||
AC_MSG_WARN(Optional SDL library not found),
|
||||
[$MATH_LIBS]),
|
||||
if test x"$sdl_header" == "xSDL.h" ; then
|
||||
[$MATH_LIBS])
|
||||
if test x"$sdl_header" = "xSDL.h"; then
|
||||
SDL_INCLUDES="$SDL_INCLUDES -DWEBP_HAVE_JUST_SDL_H"
|
||||
fi
|
||||
fi
|
||||
LIBCHECK_EPILOGUE([SDL])
|
||||
|
||||
if test "$sdl_support" = "yes" ; then
|
||||
if test "$sdl_support" = "yes"; then
|
||||
build_vwebp_sdl=yes
|
||||
fi
|
||||
])
|
||||
@ -589,7 +590,7 @@ AS_IF([test "x$enable_gif" != "xno"], [
|
||||
|
||||
if test "$gif_support" = "yes" -a \
|
||||
"$enable_libwebpdemux" = "yes"; then
|
||||
build_animdiff=yes
|
||||
build_anim_diff=yes
|
||||
fi
|
||||
|
||||
if test "$gif_support" = "yes" -a \
|
||||
@ -597,7 +598,7 @@ AS_IF([test "x$enable_gif" != "xno"], [
|
||||
build_gif2webp=yes
|
||||
fi
|
||||
])
|
||||
AM_CONDITIONAL([BUILD_ANIMDIFF], [test "${build_animdiff}" = "yes"])
|
||||
AM_CONDITIONAL([BUILD_ANIMDIFF], [test "${build_anim_diff}" = "yes"])
|
||||
AM_CONDITIONAL([BUILD_GIF2WEBP], [test "${build_gif2webp}" = "yes"])
|
||||
|
||||
if test "$enable_libwebpmux" = "yes"; then
|
||||
@ -662,7 +663,7 @@ if test "$enable_wic" = "yes"; then
|
||||
fi
|
||||
esac
|
||||
|
||||
dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP
|
||||
dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP=1
|
||||
|
||||
USE_SWAP_16BIT_CSP=""
|
||||
AC_MSG_CHECKING(if --enable-swap-16bit-csp option is specified)
|
||||
@ -670,7 +671,7 @@ AC_ARG_ENABLE([swap-16bit-csp],
|
||||
AS_HELP_STRING([--enable-swap-16bit-csp],
|
||||
[Enable byte swap for 16 bit colorspaces]))
|
||||
if test "$enable_swap_16bit_csp" = "yes"; then
|
||||
USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP"
|
||||
USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP=1"
|
||||
fi
|
||||
AC_MSG_RESULT(${enable_swap_16bit_csp-no})
|
||||
AC_SUBST(USE_SWAP_16BIT_CSP)
|
||||
@ -688,6 +689,21 @@ fi
|
||||
AC_MSG_RESULT(${enable_experimental-no})
|
||||
AC_SUBST(USE_EXPERIMENTAL_CODE)
|
||||
|
||||
dnl === If --disable-near-lossless is defined, add -DWEBP_NEAR_LOSSLESS=0
|
||||
|
||||
AC_DEFINE(WEBP_NEAR_LOSSLESS, [1], [Enable near lossless encoding])
|
||||
AC_MSG_CHECKING(if --disable-near-lossless option is specified)
|
||||
AC_ARG_ENABLE([near_lossless],
|
||||
AS_HELP_STRING([--disable-near-lossless],
|
||||
[Disable near lossless encoding]),
|
||||
[], [enable_near_lossless=yes])
|
||||
if test "$enable_near_lossless" = "no"; then
|
||||
AC_DEFINE(WEBP_NEAR_LOSSLESS, [0], [Enable near lossless encoding])
|
||||
AC_MSG_RESULT([yes])
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
fi
|
||||
|
||||
dnl === Check whether libwebpmux should be built
|
||||
AC_MSG_CHECKING(whether libwebpmux is to be built)
|
||||
AC_ARG_ENABLE([libwebpmux],
|
||||
@ -762,7 +778,7 @@ dwebp : yes
|
||||
PNG : ${png_support-no}
|
||||
WIC : ${wic_support-no}
|
||||
GIF support : ${gif_support-no}
|
||||
anim_diff : ${build_animdiff-no}
|
||||
anim_diff : ${build_anim_diff-no}
|
||||
gif2webp : ${build_gif2webp-no}
|
||||
img2webp : ${build_img2webp-no}
|
||||
webpmux : ${enable_libwebpmux-no}
|
||||
|
@ -2,7 +2,7 @@ AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
|
||||
|
||||
bin_PROGRAMS = dwebp cwebp
|
||||
if BUILD_ANIMDIFF
|
||||
noinst_PROGRAMS = anim_diff
|
||||
noinst_PROGRAMS = anim_diff anim_dump
|
||||
endif
|
||||
if BUILD_GIF2WEBP
|
||||
bin_PROGRAMS += gif2webp
|
||||
@ -27,20 +27,36 @@ libexample_util_la_LIBADD = ../src/libwebp.la
|
||||
|
||||
anim_diff_SOURCES = anim_diff.c anim_util.c anim_util.h
|
||||
anim_diff_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
|
||||
anim_diff_LDADD = ../src/demux/libwebpdemux.la
|
||||
anim_diff_LDADD += libexample_util.la ../imageio/libimageio_util.la
|
||||
anim_diff_LDADD =
|
||||
anim_diff_LDADD += ../src/demux/libwebpdemux.la
|
||||
anim_diff_LDADD += libexample_util.la
|
||||
anim_diff_LDADD += ../imageio/libimageio_util.la
|
||||
anim_diff_LDADD += $(GIF_LIBS) -lm
|
||||
|
||||
anim_dump_SOURCES = anim_dump.c anim_util.c anim_util.h
|
||||
anim_dump_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(PNG_INCLUDES)
|
||||
anim_dump_CPPFLAGS += $(GIF_INCLUDES)
|
||||
anim_dump_LDADD =
|
||||
anim_dump_LDADD += ../src/demux/libwebpdemux.la
|
||||
anim_dump_LDADD += libexample_util.la
|
||||
anim_dump_LDADD += ../imageio/libimageio_util.la
|
||||
anim_dump_LDADD += ../imageio/libimageenc.la
|
||||
anim_dump_LDADD += $(PNG_LIBS) $(GIF_LIBS) $(TIFF_LIBS) -lm
|
||||
|
||||
cwebp_SOURCES = cwebp.c stopwatch.h
|
||||
cwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
cwebp_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
cwebp_LDADD += ../imageio/libimagedec.la ../src/libwebp.la
|
||||
cwebp_LDADD =
|
||||
cwebp_LDADD += libexample_util.la
|
||||
cwebp_LDADD += ../imageio/libimageio_util.la
|
||||
cwebp_LDADD += ../imageio/libimagedec.la
|
||||
cwebp_LDADD += ../src/libwebp.la
|
||||
cwebp_LDADD += $(JPEG_LIBS) $(PNG_LIBS) $(TIFF_LIBS)
|
||||
|
||||
dwebp_SOURCES = dwebp.c stopwatch.h
|
||||
dwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
dwebp_CPPFLAGS += $(JPEG_INCLUDES) $(PNG_INCLUDES)
|
||||
dwebp_LDADD = libexample_util.la
|
||||
dwebp_LDADD =
|
||||
dwebp_LDADD += libexample_util.la
|
||||
dwebp_LDADD += ../imageio/libimagedec.la
|
||||
dwebp_LDADD += ../imageio/libimageenc.la
|
||||
dwebp_LDADD += ../imageio/libimageio_util.la
|
||||
@ -49,35 +65,52 @@ dwebp_LDADD +=$(PNG_LIBS) $(JPEG_LIBS)
|
||||
|
||||
gif2webp_SOURCES = gif2webp.c gifdec.c gifdec.h
|
||||
gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
|
||||
gif2webp_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
gif2webp_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la $(GIF_LIBS)
|
||||
gif2webp_LDADD =
|
||||
gif2webp_LDADD += libexample_util.la
|
||||
gif2webp_LDADD += ../imageio/libimageio_util.la
|
||||
gif2webp_LDADD += ../src/mux/libwebpmux.la
|
||||
gif2webp_LDADD += ../src/libwebp.la
|
||||
gif2webp_LDADD += $(GIF_LIBS)
|
||||
|
||||
vwebp_SOURCES = vwebp.c
|
||||
vwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GL_INCLUDES)
|
||||
vwebp_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
vwebp_LDADD += ../src/demux/libwebpdemux.la $(GL_LIBS)
|
||||
vwebp_LDADD =
|
||||
vwebp_LDADD += libexample_util.la
|
||||
vwebp_LDADD += ../imageio/libimageio_util.la
|
||||
vwebp_LDADD += ../src/demux/libwebpdemux.la
|
||||
vwebp_LDADD += $(GL_LIBS)
|
||||
|
||||
webpmux_SOURCES = webpmux.c
|
||||
webpmux_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
webpmux_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
webpmux_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la
|
||||
webpmux_LDADD =
|
||||
webpmux_LDADD += libexample_util.la
|
||||
webpmux_LDADD += ../imageio/libimageio_util.la
|
||||
webpmux_LDADD += ../src/mux/libwebpmux.la
|
||||
webpmux_LDADD += ../src/libwebp.la
|
||||
|
||||
img2webp_SOURCES = img2webp.c
|
||||
img2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
img2webp_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
img2webp_LDADD =
|
||||
img2webp_LDADD += libexample_util.la
|
||||
img2webp_LDADD += ../imageio/libimageio_util.la
|
||||
img2webp_LDADD += ../imageio/libimagedec.la
|
||||
img2webp_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la
|
||||
img2webp_LDADD += ../src/mux/libwebpmux.la
|
||||
img2webp_LDADD += ../src/libwebp.la
|
||||
img2webp_LDADD += $(PNG_LIBS) $(JPEG_LIBS) $(TIFF_LIBS)
|
||||
|
||||
webpinfo_SOURCES = webpinfo.c
|
||||
webpinfo_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
webpinfo_LDADD = libexample_util.la ../imageio/libimageio_util.la
|
||||
webpinfo_LDADD =
|
||||
webpinfo_LDADD += libexample_util.la
|
||||
webpinfo_LDADD += ../imageio/libimageio_util.la
|
||||
webpinfo_LDADD += ../src/libwebp.la
|
||||
|
||||
if BUILD_LIBWEBPDECODER
|
||||
anim_diff_LDADD += ../src/libwebpdecoder.la
|
||||
anim_dump_LDADD += ../src/libwebpdecoder.la
|
||||
vwebp_LDADD += ../src/libwebpdecoder.la
|
||||
else
|
||||
anim_diff_LDADD += ../src/libwebp.la
|
||||
anim_dump_LDADD += ../src/libwebp.la
|
||||
vwebp_LDADD += ../src/libwebp.la
|
||||
endif
|
||||
|
104
examples/anim_dump.c
Normal file
104
examples/anim_dump.c
Normal file
@ -0,0 +1,104 @@
|
||||
// Copyright 2017 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Decodes an animated WebP file and dumps the decoded frames as PNG or TIFF.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h> // for 'strcmp'.
|
||||
|
||||
#include "./anim_util.h"
|
||||
#include "webp/decode.h"
|
||||
#include "../imageio/image_enc.h"
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1900
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
static void Help(void) {
|
||||
printf("Usage: anim_dump [options] files...\n");
|
||||
printf("\nOptions:\n");
|
||||
printf(" -folder <string> .... dump folder (default: '.')\n");
|
||||
printf(" -prefix <string> .... prefix for dumped frames "
|
||||
"(default: 'dump_')\n");
|
||||
printf(" -tiff ............... save frames as TIFF\n");
|
||||
printf(" -pam ................ save frames as PAM\n");
|
||||
}
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
int error = 0;
|
||||
const char* dump_folder = ".";
|
||||
const char* prefix = "dump_";
|
||||
const char* suffix = "png";
|
||||
WebPOutputFileFormat format = PNG;
|
||||
int c;
|
||||
|
||||
if (argc < 2) {
|
||||
Help();
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (c = 1; !error && c < argc; ++c) {
|
||||
if (!strcmp(argv[c], "-folder")) {
|
||||
if (c + 1 == argc) {
|
||||
fprintf(stderr, "missing argument after option '%s'\n", argv[c]);
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
dump_folder = argv[++c];
|
||||
} else if (!strcmp(argv[c], "-prefix")) {
|
||||
if (c + 1 == argc) {
|
||||
fprintf(stderr, "missing argument after option '%s'\n", argv[c]);
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
prefix = argv[++c];
|
||||
} else if (!strcmp(argv[c], "-tiff")) {
|
||||
format = TIFF;
|
||||
suffix = "tiff";
|
||||
} else if (!strcmp(argv[c], "-pam")) {
|
||||
format = PAM;
|
||||
suffix = "pam";
|
||||
} else {
|
||||
uint32_t i;
|
||||
AnimatedImage image;
|
||||
const char* const file = argv[c];
|
||||
memset(&image, 0, sizeof(image));
|
||||
printf("Decoding file: %s as %s/%sxxxx.%s\n",
|
||||
file, dump_folder, prefix, suffix);
|
||||
if (!ReadAnimatedImage(file, &image, 0, NULL)) {
|
||||
fprintf(stderr, "Error decoding file: %s\n Aborting.\n", file);
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
for (i = 0; !error && i < image.num_frames; ++i) {
|
||||
char out_file[1024];
|
||||
WebPDecBuffer buffer;
|
||||
WebPInitDecBuffer(&buffer);
|
||||
buffer.colorspace = MODE_RGBA;
|
||||
buffer.is_external_memory = 1;
|
||||
buffer.width = image.canvas_width;
|
||||
buffer.height = image.canvas_height;
|
||||
buffer.u.RGBA.rgba = image.frames[i].rgba;
|
||||
buffer.u.RGBA.stride = buffer.width * sizeof(uint32_t);
|
||||
buffer.u.RGBA.size = buffer.u.RGBA.stride * buffer.height;
|
||||
snprintf(out_file, sizeof(out_file), "%s/%s%.4d.%s",
|
||||
dump_folder, prefix, i, suffix);
|
||||
if (!WebPSaveImage(&buffer, format, out_file)) {
|
||||
fprintf(stderr, "Error while saving image '%s'\n", out_file);
|
||||
error = 1;
|
||||
}
|
||||
WebPFreeDecBuffer(&buffer);
|
||||
}
|
||||
ClearAnimatedImage(&image);
|
||||
}
|
||||
}
|
||||
return error ? 1 : 0;
|
||||
}
|
@ -16,7 +16,7 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef WEBP_HAVE_GIF
|
||||
#if defined(WEBP_HAVE_GIF)
|
||||
#include <gif_lib.h>
|
||||
#endif
|
||||
#include "webp/format_constants.h"
|
||||
@ -33,11 +33,13 @@ static const int kNumChannels = 4;
|
||||
// -----------------------------------------------------------------------------
|
||||
// Common utilities.
|
||||
|
||||
#if defined(WEBP_HAVE_GIF)
|
||||
// Returns true if the frame covers the full canvas.
|
||||
static int IsFullFrame(int width, int height,
|
||||
int canvas_width, int canvas_height) {
|
||||
return (width == canvas_width && height == canvas_height);
|
||||
}
|
||||
#endif // WEBP_HAVE_GIF
|
||||
|
||||
static int CheckSizeForOverflow(uint64_t size) {
|
||||
return (size == (size_t)size);
|
||||
@ -85,6 +87,7 @@ void ClearAnimatedImage(AnimatedImage* const image) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBP_HAVE_GIF)
|
||||
// Clear the canvas to transparent.
|
||||
static void ZeroFillCanvas(uint8_t* rgba,
|
||||
uint32_t canvas_width, uint32_t canvas_height) {
|
||||
@ -126,6 +129,7 @@ static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
#endif // WEBP_HAVE_GIF
|
||||
|
||||
// Canonicalize all transparent pixels to transparent black to aid comparison.
|
||||
static void CleanupTransparentPixels(uint32_t* rgba,
|
||||
@ -152,6 +156,8 @@ static int DumpFrame(const char filename[], const char dump_folder[],
|
||||
FILE* f = NULL;
|
||||
const char* row;
|
||||
|
||||
if (dump_folder == NULL) dump_folder = ".";
|
||||
|
||||
base_name = strrchr(filename, '/');
|
||||
base_name = (base_name == NULL) ? filename : base_name + 1;
|
||||
max_len = strlen(dump_folder) + 1 + strlen(base_name)
|
||||
@ -200,7 +206,7 @@ static int IsWebP(const WebPData* const webp_data) {
|
||||
return (WebPGetInfo(webp_data->bytes, webp_data->size, NULL, NULL) != 0);
|
||||
}
|
||||
|
||||
// Read animated WebP bitstream 'file_str' into 'AnimatedImage' struct.
|
||||
// Read animated WebP bitstream 'webp_data' into 'AnimatedImage' struct.
|
||||
static int ReadAnimatedWebP(const char filename[],
|
||||
const WebPData* const webp_data,
|
||||
AnimatedImage* const image, int dump_frames,
|
||||
@ -278,7 +284,7 @@ static int ReadAnimatedWebP(const char filename[],
|
||||
// -----------------------------------------------------------------------------
|
||||
// GIF Decoding.
|
||||
|
||||
#ifdef WEBP_HAVE_GIF
|
||||
#if defined(WEBP_HAVE_GIF)
|
||||
|
||||
// Returns true if this is a valid GIF bitstream.
|
||||
static int IsGIF(const WebPData* const data) {
|
||||
@ -423,6 +429,11 @@ static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
|
||||
}
|
||||
|
||||
// Find appropriate app extension and get loop count from the next extension.
|
||||
// We use Chrome's interpretation of the 'loop_count' semantics:
|
||||
// if not present -> loop once
|
||||
// if present and loop_count == 0, return 0 ('infinite').
|
||||
// if present and loop_count != 0, it's the number of *extra* loops
|
||||
// so we need to return loop_count + 1 as total loop number.
|
||||
static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
|
||||
int i;
|
||||
for (i = 0; i < gif->ImageCount; ++i) {
|
||||
@ -440,12 +451,13 @@ static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
|
||||
if (signature_is_ok &&
|
||||
eb2->Function == CONTINUE_EXT_FUNC_CODE && eb2->ByteCount >= 3 &&
|
||||
eb2->Bytes[0] == 1) {
|
||||
return ((uint32_t)(eb2->Bytes[2]) << 8) +
|
||||
((uint32_t)(eb2->Bytes[1]) << 0);
|
||||
const uint32_t extra_loop = ((uint32_t)(eb2->Bytes[2]) << 8) +
|
||||
((uint32_t)(eb2->Bytes[1]) << 0);
|
||||
return (extra_loop > 0) ? extra_loop + 1 : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0; // Default.
|
||||
return 1; // Default.
|
||||
}
|
||||
|
||||
// Get duration of 'n'th frame in milliseconds.
|
||||
|
@ -463,8 +463,9 @@ static int WriteWebPWithMetadata(FILE* const out,
|
||||
} else {
|
||||
const int is_lossless = !memcmp(webp, "VP8L", kTagSize);
|
||||
if (is_lossless) {
|
||||
// Presence of alpha is stored in the 29th bit of VP8L data.
|
||||
if (webp[kChunkHeaderSize + 3] & (1 << 5)) flags |= kAlphaFlag;
|
||||
// Presence of alpha is stored in the 37th bit (29th after the
|
||||
// signature) of VP8L data.
|
||||
if (webp[kChunkHeaderSize + 4] & (1 << 4)) flags |= kAlphaFlag;
|
||||
}
|
||||
ok = ok && (fwrite(kVP8XHeader, kChunkHeaderSize, 1, out) == 1);
|
||||
ok = ok && WriteLE32(out, flags);
|
||||
@ -486,10 +487,10 @@ static int WriteWebPWithMetadata(FILE* const out,
|
||||
*metadata_written |= METADATA_XMP;
|
||||
}
|
||||
return ok;
|
||||
} else {
|
||||
// No metadata, just write the original image file.
|
||||
return (fwrite(webp, webp_size, 1, out) == 1);
|
||||
}
|
||||
|
||||
// No metadata, just write the original image file.
|
||||
return (fwrite(webp, webp_size, 1, out) == 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -72,8 +72,10 @@ static void Help(void) {
|
||||
printf(" -metadata <string> ..... comma separated list of metadata to\n");
|
||||
printf(" ");
|
||||
printf("copy from the input to the output if present\n");
|
||||
printf(" "
|
||||
"Valid values: all, none, icc, xmp (default)\n");
|
||||
printf(" ");
|
||||
printf("Valid values: all, none, icc, xmp (default)\n");
|
||||
printf(" -loop_compatibility .... use compatibility mode for Chrome\n");
|
||||
printf(" version prior to M62 (inclusive)\n");
|
||||
printf(" -mt .................... use multi-threading if available\n");
|
||||
printf("\n");
|
||||
printf(" -version ............... print version number and exit\n");
|
||||
@ -104,7 +106,7 @@ int main(int argc, const char *argv[]) {
|
||||
WebPAnimEncoderOptions enc_options;
|
||||
WebPConfig config;
|
||||
|
||||
int is_first_frame = 1; // Whether we are processing the first frame.
|
||||
int frame_number = 0; // Whether we are processing the first frame.
|
||||
int done;
|
||||
int c;
|
||||
int quiet = 0;
|
||||
@ -115,8 +117,9 @@ int main(int argc, const char *argv[]) {
|
||||
int stored_icc = 0; // Whether we have already stored an ICC profile.
|
||||
WebPData xmp_data;
|
||||
int stored_xmp = 0; // Whether we have already stored an XMP profile.
|
||||
int loop_count = 0;
|
||||
int loop_count = 0; // default: infinite
|
||||
int stored_loop_count = 0; // Whether we have found an explicit loop count.
|
||||
int loop_compatibility = 0;
|
||||
WebPMux* mux = NULL;
|
||||
|
||||
int default_kmin = 1; // Whether to use default kmin value.
|
||||
@ -151,6 +154,8 @@ int main(int argc, const char *argv[]) {
|
||||
} else if (!strcmp(argv[c], "-mixed")) {
|
||||
enc_options.allow_mixed = 1;
|
||||
config.lossless = 0;
|
||||
} else if (!strcmp(argv[c], "-loop_compatibility")) {
|
||||
loop_compatibility = 1;
|
||||
} else if (!strcmp(argv[c], "-q") && c < argc - 1) {
|
||||
config.quality = ExUtilGetFloat(argv[++c], &parse_error);
|
||||
} else if (!strcmp(argv[c], "-m") && c < argc - 1) {
|
||||
@ -277,7 +282,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
if (!DGifGetImageDesc(gif)) goto End;
|
||||
|
||||
if (is_first_frame) {
|
||||
if (frame_number == 0) {
|
||||
if (verbose) {
|
||||
printf("Canvas screen: %d x %d\n", gif->SWidth, gif->SHeight);
|
||||
}
|
||||
@ -319,7 +324,6 @@ int main(int argc, const char *argv[]) {
|
||||
"a memory error.\n");
|
||||
goto End;
|
||||
}
|
||||
is_first_frame = 0;
|
||||
}
|
||||
|
||||
// Some even more broken GIF can have sub-rect with zero width/height.
|
||||
@ -336,7 +340,11 @@ int main(int argc, const char *argv[]) {
|
||||
GIFBlendFrames(&frame, &gif_rect, &curr_canvas);
|
||||
|
||||
if (!WebPAnimEncoderAdd(enc, &curr_canvas, frame_timestamp, &config)) {
|
||||
fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
|
||||
fprintf(stderr, "Error while adding frame #%d: %s\n", frame_number,
|
||||
WebPAnimEncoderGetError(enc));
|
||||
goto End;
|
||||
} else {
|
||||
++frame_number;
|
||||
}
|
||||
|
||||
// Update canvases.
|
||||
@ -386,7 +394,7 @@ int main(int argc, const char *argv[]) {
|
||||
if (verbose) {
|
||||
fprintf(stderr, "Loop count: %d\n", loop_count);
|
||||
}
|
||||
stored_loop_count = (loop_count != 0);
|
||||
stored_loop_count = loop_compatibility ? (loop_count != 0) : 1;
|
||||
} else { // An extension containing metadata.
|
||||
// We only store the first encountered chunk of each type, and
|
||||
// only if requested by the user.
|
||||
@ -443,6 +451,23 @@ int main(int argc, const char *argv[]) {
|
||||
goto End;
|
||||
}
|
||||
|
||||
if (!loop_compatibility) {
|
||||
if (!stored_loop_count) {
|
||||
// if no loop-count element is seen, the default is '1' (loop-once)
|
||||
// and we need to signal it explicitly in WebP. Note however that
|
||||
// in case there's a single frame, we still don't need to store it.
|
||||
if (frame_number > 1) {
|
||||
stored_loop_count = 1;
|
||||
loop_count = 1;
|
||||
}
|
||||
} else if (loop_count > 0) {
|
||||
// adapt GIF's semantic to WebP's (except in the infinite-loop case)
|
||||
loop_count += 1;
|
||||
}
|
||||
}
|
||||
// loop_count of 0 is the default (infinite), so no need to signal it
|
||||
if (loop_count == 0) stored_loop_count = 0;
|
||||
|
||||
if (stored_loop_count || stored_icc || stored_xmp) {
|
||||
// Re-mux to add loop count and/or metadata as needed.
|
||||
mux = WebPMuxCreate(&webp_data, 1);
|
||||
|
@ -248,9 +248,9 @@ static void HandleKey(unsigned char key, int pos_x, int pos_y) {
|
||||
}
|
||||
}
|
||||
} else if (key == 'i') {
|
||||
// Note: doesn't handle refresh of animation's last-frame (it's quite
|
||||
// more involved to do, since you need to save the previous frame).
|
||||
kParams.print_info = 1 - kParams.print_info;
|
||||
// TODO(skal): handle refresh of animation's last-frame too. It's quite
|
||||
// more involved though (need to save the previous frame).
|
||||
if (!kParams.has_animation) ClearPreviousFrame();
|
||||
glutPostRedisplay();
|
||||
} else if (key == 'd') {
|
||||
@ -260,8 +260,8 @@ static void HandleKey(unsigned char key, int pos_x, int pos_y) {
|
||||
}
|
||||
|
||||
static void HandleReshape(int width, int height) {
|
||||
// TODO(skal): should we preserve aspect ratio?
|
||||
// Also: handle larger-than-screen pictures correctly.
|
||||
// Note: reshape doesn't preserve aspect ratio, and might
|
||||
// be handling larger-than-screen pictures incorrectly.
|
||||
glViewport(0, 0, width, height);
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
@ -378,13 +378,23 @@ static void HandleDisplay(void) {
|
||||
}
|
||||
}
|
||||
glPopMatrix();
|
||||
#if defined(__APPLE__) || defined(_WIN32)
|
||||
glFlush();
|
||||
#else
|
||||
glutSwapBuffers();
|
||||
#endif
|
||||
}
|
||||
|
||||
static void StartDisplay(void) {
|
||||
const int width = kParams.canvas_width;
|
||||
const int height = kParams.canvas_height;
|
||||
// TODO(webp:365) GLUT_DOUBLE results in flickering / old frames to be
|
||||
// partially displayed with animated webp + alpha.
|
||||
#if defined(__APPLE__) || defined(_WIN32)
|
||||
glutInitDisplayMode(GLUT_RGBA);
|
||||
#else
|
||||
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
|
||||
#endif
|
||||
glutInitWindowSize(width, height);
|
||||
glutCreateWindow("WebP viewer");
|
||||
glutDisplayFunc(HandleDisplay);
|
||||
|
@ -233,20 +233,20 @@ static int GetSignedBits(const uint8_t* const data, size_t data_size, size_t nb,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define GET_BITS(v, n) \
|
||||
do { \
|
||||
if (!GetBits(data, data_size, n, &v, bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossy bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
#define GET_BITS(v, n) \
|
||||
do { \
|
||||
if (!GetBits(data, data_size, n, &(v), bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossy bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define GET_SIGNED_BITS(v, n) \
|
||||
do { \
|
||||
if (!GetSignedBits(data, data_size, n, &v, bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossy bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
#define GET_SIGNED_BITS(v, n) \
|
||||
do { \
|
||||
if (!GetSignedBits(data, data_size, n, &(v), bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossy bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WebPInfoStatus ParseLossySegmentHeader(const WebPInfo* const webp_info,
|
||||
@ -462,12 +462,12 @@ static int LLGetBits(const uint8_t* const data, size_t data_size, size_t nb,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define LL_GET_BITS(v, n) \
|
||||
do { \
|
||||
if (!LLGetBits(data, data_size, n, &v, bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossless bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
#define LL_GET_BITS(v, n) \
|
||||
do { \
|
||||
if (!LLGetBits(data, data_size, n, &(v), bit_pos)) { \
|
||||
LOG_ERROR("Truncated lossless bitstream."); \
|
||||
return WEBP_INFO_TRUNCATED_DATA; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WebPInfoStatus ParseLosslessTransform(WebPInfo* const webp_info,
|
||||
@ -817,9 +817,8 @@ static WebPInfoStatus ProcessImageChunk(const ChunkData* const chunk_data,
|
||||
if (webp_info->seen_image_subchunk_) {
|
||||
LOG_ERROR("Consecutive VP8/VP8L sub-chunks in an ANMF chunk.");
|
||||
return WEBP_INFO_PARSE_ERROR;
|
||||
} else {
|
||||
webp_info->seen_image_subchunk_ = 1;
|
||||
}
|
||||
webp_info->seen_image_subchunk_ = 1;
|
||||
} else {
|
||||
if (webp_info->chunk_counts_[CHUNK_VP8] ||
|
||||
webp_info->chunk_counts_[CHUNK_VP8L]) {
|
||||
@ -873,9 +872,9 @@ static WebPInfoStatus ProcessALPHChunk(const ChunkData* const chunk_data,
|
||||
if (webp_info->seen_alpha_subchunk_) {
|
||||
LOG_ERROR("Consecutive ALPH sub-chunks in an ANMF chunk.");
|
||||
return WEBP_INFO_PARSE_ERROR;
|
||||
} else {
|
||||
webp_info->seen_alpha_subchunk_ = 1;
|
||||
}
|
||||
webp_info->seen_alpha_subchunk_ = 1;
|
||||
|
||||
if (webp_info->seen_image_subchunk_) {
|
||||
LOG_ERROR("ALPHA sub-chunk detected after VP8 sub-chunk "
|
||||
"in an ANMF chunk.");
|
||||
@ -1107,6 +1106,7 @@ static void HelpLong(void) {
|
||||
"Note: there could be multiple input files;\n"
|
||||
" options must come before input files.\n"
|
||||
"Options:\n"
|
||||
" -version ........... Print version number and exit.\n"
|
||||
" -quiet ............. Do not show chunk parsing information.\n"
|
||||
" -diag .............. Show parsing error diagnosis.\n"
|
||||
" -summary ........... Show chunk stats summary.\n"
|
||||
@ -1140,6 +1140,11 @@ int main(int argc, const char* argv[]) {
|
||||
show_summary = 1;
|
||||
} else if (!strcmp(argv[c], "-bitstream_info")) {
|
||||
parse_bitstream = 1;
|
||||
} else if (!strcmp(argv[c], "-version")) {
|
||||
const int version = WebPGetDecoderVersion();
|
||||
printf("WebP Decoder version: %d.%d.%d\n",
|
||||
(version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
|
||||
return 0;
|
||||
} else { // Assume the remaining are all input files.
|
||||
break;
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
|
||||
AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
|
||||
noinst_LTLIBRARIES = libwebpextras.la
|
||||
|
||||
@ -19,18 +20,22 @@ endif
|
||||
|
||||
get_disto_SOURCES = get_disto.c
|
||||
get_disto_CPPFLAGS = $(AM_CPPFLAGS)
|
||||
get_disto_LDADD = ../imageio/libimageio_util.la ../imageio/libimagedec.la
|
||||
get_disto_LDADD =
|
||||
get_disto_LDADD += ../imageio/libimageio_util.la
|
||||
get_disto_LDADD += ../imageio/libimagedec.la
|
||||
get_disto_LDADD += ../src/libwebp.la
|
||||
get_disto_LDADD += $(PNG_LIBS) $(JPEG_LIBS) $(TIFF_LIBS)
|
||||
|
||||
webp_quality_SOURCES = webp_quality.c
|
||||
webp_quality_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
webp_quality_LDADD = ../imageio/libimageio_util.la
|
||||
webp_quality_LDADD =
|
||||
webp_quality_LDADD += ../imageio/libimageio_util.la
|
||||
webp_quality_LDADD += libwebpextras.la
|
||||
webp_quality_LDADD += ../src/libwebp.la
|
||||
|
||||
vwebp_sdl_SOURCES = vwebp_sdl.c webp_to_sdl.c webp_to_sdl.h
|
||||
vwebp_sdl_CPPFLAGS = $(AM_CPPFLAGS) $(SDL_INCLUDES)
|
||||
vwebp_sdl_LDADD = ../imageio/libimageio_util.la
|
||||
vwebp_sdl_LDADD =
|
||||
vwebp_sdl_LDADD += ../imageio/libimageio_util.la
|
||||
vwebp_sdl_LDADD += ../src/libwebp.la
|
||||
vwebp_sdl_LDADD += $(SDL_LIBS)
|
||||
|
@ -10,7 +10,7 @@
|
||||
// Additional WebP utilities.
|
||||
//
|
||||
|
||||
#include "./extras.h"
|
||||
#include "extras/extras.h"
|
||||
#include "webp/format_constants.h"
|
||||
|
||||
#include <assert.h>
|
||||
@ -18,7 +18,7 @@
|
||||
|
||||
#define XTRA_MAJ_VERSION 0
|
||||
#define XTRA_MIN_VERSION 1
|
||||
#define XTRA_REV_VERSION 0
|
||||
#define XTRA_REV_VERSION 1
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
@ -25,28 +25,28 @@ extern "C" {
|
||||
|
||||
// Returns the version number of the extras library, packed in hexadecimal using
|
||||
// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
|
||||
WEBP_EXTERN(int) WebPGetExtrasVersion(void);
|
||||
WEBP_EXTERN int WebPGetExtrasVersion(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Ad-hoc colorspace importers.
|
||||
|
||||
// Import luma sample (gray scale image) into 'picture'. The 'picture'
|
||||
// width and height must be set prior to calling this function.
|
||||
WEBP_EXTERN(int) WebPImportGray(const uint8_t* gray, WebPPicture* picture);
|
||||
WEBP_EXTERN int WebPImportGray(const uint8_t* gray, WebPPicture* picture);
|
||||
|
||||
// Import rgb sample in RGB565 packed format into 'picture'. The 'picture'
|
||||
// width and height must be set prior to calling this function.
|
||||
WEBP_EXTERN(int) WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);
|
||||
WEBP_EXTERN int WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);
|
||||
|
||||
// Import rgb sample in RGB4444 packed format into 'picture'. The 'picture'
|
||||
// width and height must be set prior to calling this function.
|
||||
WEBP_EXTERN(int) WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
|
||||
WEBP_EXTERN int WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
|
||||
|
||||
// Import a color mapped image. The number of colors is less or equal to
|
||||
// MAX_PALETTE_SIZE. 'pic' must have been initialized. Its content, if any,
|
||||
// will be discarded. Returns 'false' in case of error, or if indexed[] contains
|
||||
// invalid indices.
|
||||
WEBP_EXTERN(int)
|
||||
WEBP_EXTERN int
|
||||
WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
|
||||
const uint32_t palette[], int palette_size,
|
||||
WebPPicture* pic);
|
||||
@ -59,7 +59,7 @@ WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
|
||||
// Otherwise (lossy bitstream), the returned value is in the range [0..100].
|
||||
// Any error (invalid bitstream, animated WebP, incomplete header, etc.)
|
||||
// will return a value of -1.
|
||||
WEBP_EXTERN(int) VP8EstimateQuality(const uint8_t* const data, size_t size);
|
||||
WEBP_EXTERN int VP8EstimateQuality(const uint8_t* const data, size_t size);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
@ -24,8 +24,8 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "webp/encode.h"
|
||||
#include "../imageio/image_dec.h"
|
||||
#include "../imageio/imageio_util.h"
|
||||
#include "imageio/image_dec.h"
|
||||
#include "imageio/imageio_util.h"
|
||||
|
||||
static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
|
||||
int keep_alpha) {
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./extras.h"
|
||||
#include "extras/extras.h"
|
||||
#include "webp/decode.h"
|
||||
|
||||
#include <math.h>
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
#include "webp_to_sdl.h"
|
||||
#include "webp/decode.h"
|
||||
#include "../imageio/imageio_util.h"
|
||||
#include "imageio/imageio_util.h"
|
||||
|
||||
#if defined(WEBP_HAVE_JUST_SDL_H)
|
||||
#include <SDL.h>
|
||||
|
@ -11,8 +11,8 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "./extras.h"
|
||||
#include "../imageio/imageio_util.h"
|
||||
#include "extras/extras.h"
|
||||
#include "imageio/imageio_util.h"
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
int c;
|
||||
|
7
extras/webp_to_sdl.c
Executable file → Normal file
7
extras/webp_to_sdl.c
Executable file → Normal file
@ -28,6 +28,7 @@
|
||||
#include <SDL/SDL.h>
|
||||
#endif
|
||||
|
||||
static int init_ok = 0;
|
||||
int WebpToSDL(const char* data, unsigned int data_size) {
|
||||
int ok = 0;
|
||||
VP8StatusCode status;
|
||||
@ -42,7 +43,10 @@ int WebpToSDL(const char* data, unsigned int data_size) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
SDL_Init(SDL_INIT_VIDEO);
|
||||
if (!init_ok) {
|
||||
SDL_Init(SDL_INIT_VIDEO);
|
||||
init_ok = 1;
|
||||
}
|
||||
|
||||
status = WebPGetFeatures((uint8_t*)data, (size_t)data_size, &config.input);
|
||||
if (status != VP8_STATUS_OK) goto Error;
|
||||
@ -97,6 +101,7 @@ int WebpToSDL(const char* data, unsigned int data_size) {
|
||||
Error:
|
||||
SDL_FreeSurface(surface);
|
||||
SDL_FreeSurface(screen);
|
||||
WebPFreeDecBuffer(output);
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
@ -1,13 +1,18 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
|
||||
noinst_LTLIBRARIES = libimageio_util.la libimagedec.la libimageenc.la
|
||||
noinst_LTLIBRARIES =
|
||||
noinst_LTLIBRARIES += libimageio_util.la
|
||||
noinst_LTLIBRARIES += libimagedec.la
|
||||
noinst_LTLIBRARIES += libimageenc.la
|
||||
|
||||
noinst_HEADERS =
|
||||
noinst_HEADERS += ../src/webp/decode.h
|
||||
noinst_HEADERS += ../src/webp/types.h
|
||||
|
||||
libimageio_util_la_SOURCES = imageio_util.c imageio_util.h
|
||||
libimageio_util_la_SOURCES =
|
||||
libimageio_util_la_SOURCES += imageio_util.c imageio_util.h
|
||||
|
||||
libimagedec_la_SOURCES = image_dec.c image_dec.h
|
||||
libimagedec_la_SOURCES =
|
||||
libimagedec_la_SOURCES += image_dec.c image_dec.h
|
||||
libimagedec_la_SOURCES += jpegdec.c jpegdec.h
|
||||
libimagedec_la_SOURCES += metadata.c metadata.h
|
||||
libimagedec_la_SOURCES += pngdec.c pngdec.h
|
||||
@ -18,6 +23,7 @@ libimagedec_la_SOURCES += wicdec.c wicdec.h
|
||||
libimagedec_la_CPPFLAGS = $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
|
||||
libimagedec_la_CPPFLAGS += $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
|
||||
libimageenc_la_SOURCES = image_enc.c image_enc.h
|
||||
libimageenc_la_SOURCES =
|
||||
libimageenc_la_SOURCES += image_enc.c image_enc.h
|
||||
libimageenc_la_CPPFLAGS = $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
|
||||
libimageenc_la_CPPFLAGS += $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
|
||||
|
@ -542,22 +542,24 @@ int WebPWriteYUV(FILE* fout, const WebPDecBuffer* const buffer) {
|
||||
// Generic top-level call
|
||||
|
||||
int WebPSaveImage(const WebPDecBuffer* const buffer,
|
||||
WebPOutputFileFormat format, const char* const out_file) {
|
||||
WebPOutputFileFormat format,
|
||||
const char* const out_file_name) {
|
||||
FILE* fout = NULL;
|
||||
int needs_open_file = 1;
|
||||
const int use_stdout = (out_file != NULL) && !strcmp(out_file, "-");
|
||||
const int use_stdout = (out_file_name != NULL) && !strcmp(out_file_name, "-");
|
||||
int ok = 1;
|
||||
|
||||
if (buffer == NULL || out_file == NULL) return 0;
|
||||
if (buffer == NULL || out_file_name == NULL) return 0;
|
||||
|
||||
#ifdef HAVE_WINCODEC_H
|
||||
needs_open_file = (format != PNG);
|
||||
#endif
|
||||
|
||||
if (needs_open_file) {
|
||||
fout = use_stdout ? ImgIoUtilSetBinaryMode(stdout) : fopen(out_file, "wb");
|
||||
fout = use_stdout ? ImgIoUtilSetBinaryMode(stdout)
|
||||
: fopen(out_file_name, "wb");
|
||||
if (fout == NULL) {
|
||||
fprintf(stderr, "Error opening output file %s\n", out_file);
|
||||
fprintf(stderr, "Error opening output file %s\n", out_file_name);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -566,7 +568,7 @@ int WebPSaveImage(const WebPDecBuffer* const buffer,
|
||||
format == RGBA || format == BGRA || format == ARGB ||
|
||||
format == rgbA || format == bgrA || format == Argb) {
|
||||
#ifdef HAVE_WINCODEC_H
|
||||
ok &= WebPWritePNG(out_file, use_stdout, buffer);
|
||||
ok &= WebPWritePNG(out_file_name, use_stdout, buffer);
|
||||
#else
|
||||
ok &= WebPWritePNG(fout, buffer);
|
||||
#endif
|
||||
|
@ -137,7 +137,11 @@ void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
|
||||
|
||||
int ImgIoUtilCheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
|
||||
const uint64_t total_size = nmemb * size;
|
||||
return (total_size == (size_t)total_size);
|
||||
int ok = (total_size == (size_t)total_size);
|
||||
#if defined(WEBP_MAX_IMAGE_SIZE)
|
||||
ok = ok && (total_size <= (uint64_t)WEBP_MAX_IMAGE_SIZE);
|
||||
#endif
|
||||
return ok;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
@ -304,18 +304,18 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
|
||||
|
||||
if (stride != (int)stride ||
|
||||
!ImgIoUtilCheckSizeArgumentsOverflow(stride, height)) {
|
||||
goto End;
|
||||
goto Error;
|
||||
}
|
||||
|
||||
rgb = (uint8_t*)malloc((size_t)stride * height);
|
||||
if (rgb == NULL) {
|
||||
goto End;
|
||||
goto Error;
|
||||
}
|
||||
buffer[0] = (JSAMPLE*)rgb;
|
||||
|
||||
while (dinfo.output_scanline < dinfo.output_height) {
|
||||
if (jpeg_read_scanlines((j_decompress_ptr)&dinfo, buffer, 1) != 1) {
|
||||
goto End;
|
||||
goto Error;
|
||||
}
|
||||
buffer[0] += stride;
|
||||
}
|
||||
|
@ -117,8 +117,13 @@ static size_t ReadPAMFields(PNMInfo* const info, size_t off) {
|
||||
}
|
||||
}
|
||||
if (!(info->seen_flags & TUPLE_FLAG)) {
|
||||
info->seen_flags |= TUPLE_FLAG;
|
||||
info->bytes_per_px = info->depth * (info->max_value > 255 ? 2 : 1);
|
||||
if (info->depth > 0 && info->depth <= 4) {
|
||||
info->seen_flags |= TUPLE_FLAG;
|
||||
info->bytes_per_px = info->depth * (info->max_value > 255 ? 2 : 1);
|
||||
} else {
|
||||
fprintf(stderr, "PAM: invalid bitdepth (%d).\n", info->depth);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (info->seen_flags != ALL_NEEDED_FLAGS) {
|
||||
fprintf(stderr, "PAM: incomplete header.\n");
|
||||
|
@ -141,10 +141,21 @@ int ReadWebP(const uint8_t* const data, size_t data_size,
|
||||
|
||||
do {
|
||||
const int has_alpha = keep_alpha && bitstream->has_alpha;
|
||||
uint64_t stride;
|
||||
pic->width = bitstream->width;
|
||||
pic->height = bitstream->height;
|
||||
if (!pic->use_argb) pic->colorspace = has_alpha ? WEBP_YUV420A
|
||||
: WEBP_YUV420;
|
||||
if (pic->use_argb) {
|
||||
stride = (uint64_t)bitstream->width * 4;
|
||||
} else {
|
||||
stride = (uint64_t)bitstream->width * (has_alpha ? 5 : 3) / 2;
|
||||
pic->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
|
||||
}
|
||||
|
||||
if (!ImgIoUtilCheckSizeArgumentsOverflow(stride, bitstream->height)) {
|
||||
status = VP8_STATUS_OUT_OF_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
ok = WebPPictureAlloc(pic);
|
||||
if (!ok) {
|
||||
status = VP8_STATUS_OUT_OF_MEMORY;
|
||||
|
@ -34,6 +34,16 @@ else
|
||||
GL_LIBS = -lglut -lGL
|
||||
endif
|
||||
|
||||
# SDL flags: use sdl-config if it exists
|
||||
SDL_CONFIG = $(shell sdl-config --version 2> /dev/null)
|
||||
ifneq ($(SDL_CONFIG),)
|
||||
SDL_LIBS = $(shell sdl-config --libs)
|
||||
SDL_FLAGS = $(shell sdl-config --cflags)
|
||||
else
|
||||
# use best-guess
|
||||
SDL_LIBS = -lSDL
|
||||
SDL_FLAGS =
|
||||
endif
|
||||
|
||||
# To install libraries on Mac OS X:
|
||||
# 1. Install MacPorts (http://www.macports.org/install.php)
|
||||
@ -57,7 +67,7 @@ endif
|
||||
# EXTRA_FLAGS += -DWEBP_EXPERIMENTAL_FEATURES
|
||||
|
||||
# Extra flags to enable byte swap for 16 bit colorspaces.
|
||||
# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP
|
||||
# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP=1
|
||||
|
||||
# Extra flags to enable multi-threading
|
||||
EXTRA_FLAGS += -DWEBP_USE_THREAD
|
||||
@ -103,7 +113,7 @@ endif
|
||||
|
||||
AR = ar
|
||||
ARFLAGS = r
|
||||
CPPFLAGS = -Isrc/ -Wall
|
||||
CPPFLAGS = -I. -Isrc/ -Wall
|
||||
CFLAGS = -O3 -DNDEBUG $(EXTRA_FLAGS)
|
||||
CC = gcc
|
||||
INSTALL = install
|
||||
@ -173,9 +183,6 @@ DSP_DEC_OBJS = \
|
||||
src/dsp/yuv_sse2.o \
|
||||
|
||||
DSP_ENC_OBJS = \
|
||||
src/dsp/argb.o \
|
||||
src/dsp/argb_mips_dsp_r2.o \
|
||||
src/dsp/argb_sse2.o \
|
||||
src/dsp/cost.o \
|
||||
src/dsp/cost_mips32.o \
|
||||
src/dsp/cost_mips_dsp_r2.o \
|
||||
@ -335,7 +342,8 @@ OUT_LIBS += src/libwebp.a
|
||||
EXTRA_LIB = extras/libwebpextras.a
|
||||
OUT_EXAMPLES = examples/cwebp examples/dwebp
|
||||
EXTRA_EXAMPLES = examples/gif2webp examples/vwebp examples/webpmux \
|
||||
examples/anim_diff examples/img2webp examples/webpinfo
|
||||
examples/anim_diff examples/anim_dump \
|
||||
examples/img2webp examples/webpinfo
|
||||
OTHER_EXAMPLES = extras/get_disto extras/webp_quality extras/vwebp_sdl
|
||||
|
||||
OUTPUT = $(OUT_LIBS) $(OUT_EXAMPLES)
|
||||
@ -363,7 +371,7 @@ src/utils/bit_reader_utils.o: src/utils/endian_inl_utils.h
|
||||
src/utils/bit_writer_utils.o: src/utils/endian_inl_utils.h
|
||||
|
||||
%.o: %.c $(HDRS)
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@
|
||||
|
||||
examples/libanim_util.a: $(ANIM_UTIL_OBJS)
|
||||
examples/libexample_util.a: $(EX_UTIL_OBJS)
|
||||
@ -381,6 +389,7 @@ src/demux/libwebpdemux.a: $(LIBWEBPDEMUX_OBJS)
|
||||
$(AR) $(ARFLAGS) $@ $^
|
||||
|
||||
examples/anim_diff: examples/anim_diff.o $(ANIM_UTIL_OBJS) $(GIFDEC_OBJS)
|
||||
examples/anim_dump: examples/anim_dump.o $(ANIM_UTIL_OBJS)
|
||||
examples/cwebp: examples/cwebp.o
|
||||
examples/dwebp: examples/dwebp.o
|
||||
examples/gif2webp: examples/gif2webp.o $(GIFDEC_OBJS)
|
||||
@ -394,6 +403,13 @@ examples/anim_diff: src/demux/libwebpdemux.a examples/libexample_util.a
|
||||
examples/anim_diff: imageio/libimageio_util.a src/libwebp.a
|
||||
examples/anim_diff: EXTRA_LIBS += $(GIF_LIBS)
|
||||
examples/anim_diff: EXTRA_FLAGS += -DWEBP_HAVE_GIF
|
||||
examples/anim_dump: examples/libanim_util.a
|
||||
examples/anim_dump: src/demux/libwebpdemux.a
|
||||
examples/anim_dump: examples/libexample_util.a
|
||||
examples/anim_dump: imageio/libimageio_util.a
|
||||
examples/anim_dump: imageio/libimageenc.a
|
||||
examples/anim_dump: src/libwebp.a
|
||||
examples/anim_dump: EXTRA_LIBS += $(GIF_LIBS) $(DWEBP_LIBS)
|
||||
examples/cwebp: examples/libexample_util.a
|
||||
examples/cwebp: imageio/libimagedec.a
|
||||
examples/cwebp: imageio/libimageio_util.a
|
||||
@ -434,8 +450,8 @@ extras/vwebp_sdl: extras/vwebp_sdl.o
|
||||
extras/vwebp_sdl: extras/webp_to_sdl.o
|
||||
extras/vwebp_sdl: imageio/libimageio_util.a
|
||||
extras/vwebp_sdl: src/libwebp.a
|
||||
extras/vwebp_sdl: EXTRA_FLAGS += -DWEBP_HAVE_SDL
|
||||
extras/vwebp_sdl: EXTRA_LIBS += -lSDL
|
||||
extras/vwebp_sdl: EXTRA_FLAGS += -DWEBP_HAVE_SDL $(SDL_FLAGS)
|
||||
extras/vwebp_sdl: EXTRA_LIBS += $(SDL_LIBS)
|
||||
|
||||
$(OUT_EXAMPLES) $(EXTRA_EXAMPLES) $(OTHER_EXAMPLES):
|
||||
$(CC) -o $@ $^ $(LDFLAGS)
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH GIF2WEBP 1 "January 25, 2017"
|
||||
.TH GIF2WEBP 1 "September 20, 2017"
|
||||
.SH NAME
|
||||
gif2webp \- Convert a GIF image to WebP
|
||||
.SH SYNOPSIS
|
||||
@ -109,6 +109,9 @@ the range of 20 to 50.
|
||||
.TP
|
||||
.B \-mt
|
||||
Use multi-threading for encoding, if possible.
|
||||
.B \-loop_compatibility
|
||||
If enabled, handle the loop information in a compatible fashion for Chrome
|
||||
version prior to M62 (inclusive) and Firefox.
|
||||
.TP
|
||||
.B \-v
|
||||
Print extra information.
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH WEBPINFO 1 "May 08, 2017"
|
||||
.TH WEBPINFO 1 "November 24, 2017"
|
||||
.SH NAME
|
||||
webpinfo \- print out the chunk level structure of WebP files
|
||||
along with basic integrity checks.
|
||||
@ -22,16 +22,19 @@ WebP format.
|
||||
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B -quiet
|
||||
.B \-version
|
||||
Print the version number (as major.minor.revision) and exit.
|
||||
.TP
|
||||
.B \-quiet
|
||||
Do not show chunk parsing information.
|
||||
.TP
|
||||
.B -diag
|
||||
.B \-diag
|
||||
Show parsing error diagnosis.
|
||||
.TP
|
||||
.B -summary
|
||||
.B \-summary
|
||||
Show chunk stats summary.
|
||||
.TP
|
||||
.BI -bitstream_info
|
||||
.BI \-bitstream_info
|
||||
Parse bitstream header.
|
||||
.TP
|
||||
.B \-h, \-help
|
||||
|
@ -22,6 +22,7 @@ commondir = $(includedir)/webp
|
||||
libwebp_la_SOURCES =
|
||||
libwebpinclude_HEADERS =
|
||||
libwebpinclude_HEADERS += webp/encode.h
|
||||
|
||||
noinst_HEADERS =
|
||||
noinst_HEADERS += webp/format_constants.h
|
||||
|
||||
@ -35,7 +36,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
|
||||
# other than the ones listed on the command line, i.e., after linking, it will
|
||||
# not have unresolved symbols. Some platforms (Windows among them) require all
|
||||
# symbols in shared libraries to be resolved at library creation.
|
||||
libwebp_la_LDFLAGS = -no-undefined -version-info 7:0:0
|
||||
libwebp_la_LDFLAGS = -no-undefined -version-info 7:1:0
|
||||
libwebpincludedir = $(includedir)/webp
|
||||
pkgconfig_DATA = libwebp.pc
|
||||
|
||||
@ -47,7 +48,7 @@ if BUILD_LIBWEBPDECODER
|
||||
libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
|
||||
libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la
|
||||
|
||||
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:0:0
|
||||
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:1:0
|
||||
pkgconfig_DATA += libwebpdecoder.pc
|
||||
endif
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
|
||||
noinst_LTLIBRARIES = libwebpdecode.la
|
||||
|
||||
libwebpdecode_la_SOURCES =
|
||||
|
@ -12,13 +12,13 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./alphai_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../utils/quant_levels_dec_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/format_constants.h"
|
||||
#include "src/dec/alphai_dec.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/utils/quant_levels_dec_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// ALPHDecoder object.
|
||||
|
@ -11,11 +11,11 @@
|
||||
//
|
||||
// Author: Urvang (urvang@google.com)
|
||||
|
||||
#ifndef WEBP_DEC_ALPHAI_H_
|
||||
#define WEBP_DEC_ALPHAI_H_
|
||||
#ifndef WEBP_DEC_ALPHAI_DEC_H_
|
||||
#define WEBP_DEC_ALPHAI_DEC_H_
|
||||
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/filters_utils.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/filters_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -51,4 +51,4 @@ void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_ALPHAI_H_ */
|
||||
#endif /* WEBP_DEC_ALPHAI_DEC_H_ */
|
||||
|
@ -13,15 +13,15 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// WebPDecBuffer
|
||||
|
||||
// Number of bytes per pixel for the different color-spaces.
|
||||
static const int kModeBpp[MODE_LAST] = {
|
||||
static const uint8_t kModeBpp[MODE_LAST] = {
|
||||
3, 4, 3, 4, 4, 2, 2,
|
||||
4, 4, 4, 2, // pre-multiplied modes
|
||||
1, 1 };
|
||||
@ -36,7 +36,7 @@ static int IsValidColorspace(int webp_csp_mode) {
|
||||
// strictly speaking, the very last (or first, if flipped) row
|
||||
// doesn't require padding.
|
||||
#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \
|
||||
(uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)
|
||||
((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
|
||||
|
||||
static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
|
||||
int ok = 1;
|
||||
@ -98,9 +98,14 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
|
||||
uint64_t uv_size = 0, a_size = 0, total_size;
|
||||
// We need memory and it hasn't been allocated yet.
|
||||
// => initialize output buffer, now that dimensions are known.
|
||||
const int stride = w * kModeBpp[mode];
|
||||
const uint64_t size = (uint64_t)stride * h;
|
||||
int stride;
|
||||
uint64_t size;
|
||||
|
||||
if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
stride = w * kModeBpp[mode];
|
||||
size = (uint64_t)stride * h;
|
||||
if (!WebPIsRGBMode(mode)) {
|
||||
uv_stride = (w + 1) / 2;
|
||||
uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
|
||||
@ -169,11 +174,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
|
||||
return VP8_STATUS_OK;
|
||||
}
|
||||
|
||||
VP8StatusCode WebPAllocateDecBuffer(int w, int h,
|
||||
VP8StatusCode WebPAllocateDecBuffer(int width, int height,
|
||||
const WebPDecoderOptions* const options,
|
||||
WebPDecBuffer* const out) {
|
||||
WebPDecBuffer* const buffer) {
|
||||
VP8StatusCode status;
|
||||
if (out == NULL || w <= 0 || h <= 0) {
|
||||
if (buffer == NULL || width <= 0 || height <= 0) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
if (options != NULL) { // First, apply options if there is any.
|
||||
@ -182,33 +187,39 @@ VP8StatusCode WebPAllocateDecBuffer(int w, int h,
|
||||
const int ch = options->crop_height;
|
||||
const int x = options->crop_left & ~1;
|
||||
const int y = options->crop_top & ~1;
|
||||
if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) {
|
||||
if (x < 0 || y < 0 || cw <= 0 || ch <= 0 ||
|
||||
x + cw > width || y + ch > height) {
|
||||
return VP8_STATUS_INVALID_PARAM; // out of frame boundary.
|
||||
}
|
||||
w = cw;
|
||||
h = ch;
|
||||
width = cw;
|
||||
height = ch;
|
||||
}
|
||||
|
||||
if (options->use_scaling) {
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
int scaled_width = options->scaled_width;
|
||||
int scaled_height = options->scaled_height;
|
||||
if (!WebPRescalerGetScaledDimensions(
|
||||
w, h, &scaled_width, &scaled_height)) {
|
||||
width, height, &scaled_width, &scaled_height)) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
w = scaled_width;
|
||||
h = scaled_height;
|
||||
width = scaled_width;
|
||||
height = scaled_height;
|
||||
#else
|
||||
return VP8_STATUS_INVALID_PARAM; // rescaling not supported
|
||||
#endif
|
||||
}
|
||||
}
|
||||
out->width = w;
|
||||
out->height = h;
|
||||
buffer->width = width;
|
||||
buffer->height = height;
|
||||
|
||||
// Then, allocate buffer for real.
|
||||
status = AllocateBuffer(out);
|
||||
status = AllocateBuffer(buffer);
|
||||
if (status != VP8_STATUS_OK) return status;
|
||||
|
||||
// Use the stride trick if vertical flip is needed.
|
||||
if (options != NULL && options->flip) {
|
||||
status = WebPFlipBuffer(out);
|
||||
status = WebPFlipBuffer(buffer);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
@ -11,8 +11,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_COMMON_H_
|
||||
#define WEBP_DEC_COMMON_H_
|
||||
#ifndef WEBP_DEC_COMMON_DEC_H_
|
||||
#define WEBP_DEC_COMMON_DEC_H_
|
||||
|
||||
// intra prediction modes
|
||||
enum { B_DC_PRED = 0, // 4x4 modes
|
||||
@ -51,4 +51,4 @@ enum { MB_FEATURE_TREE_PROBS = 3,
|
||||
NUM_PROBAS = 11
|
||||
};
|
||||
|
||||
#endif // WEBP_DEC_COMMON_H_
|
||||
#endif // WEBP_DEC_COMMON_DEC_H_
|
||||
|
@ -12,13 +12,13 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main reconstruction function.
|
||||
|
||||
static const int kScan[16] = {
|
||||
static const uint16_t kScan[16] = {
|
||||
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
|
||||
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
|
||||
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
|
||||
@ -320,7 +320,7 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
|
||||
#define MIN_DITHER_AMP 4
|
||||
|
||||
#define DITHER_AMP_TAB_SIZE 12
|
||||
static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
|
||||
static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
|
||||
// roughly, it's dqm->uv_mat_[1]
|
||||
8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
|
||||
};
|
||||
@ -728,7 +728,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
}
|
||||
|
||||
mem = (uint8_t*)dec->mem_;
|
||||
dec->intra_t_ = (uint8_t*)mem;
|
||||
dec->intra_t_ = mem;
|
||||
mem += intra_pred_mode_size;
|
||||
|
||||
dec->yuv_t_ = (VP8TopSamples*)mem;
|
||||
@ -750,7 +750,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
|
||||
mem = (uint8_t*)WEBP_ALIGN(mem);
|
||||
assert((yuv_size & WEBP_ALIGN_CST) == 0);
|
||||
dec->yuv_b_ = (uint8_t*)mem;
|
||||
dec->yuv_b_ = mem;
|
||||
mem += yuv_size;
|
||||
|
||||
dec->mb_data_ = (VP8MBData*)mem;
|
||||
@ -766,7 +766,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
const int extra_rows = kFilterExtraRows[dec->filter_type_];
|
||||
const int extra_y = extra_rows * dec->cache_y_stride_;
|
||||
const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
|
||||
dec->cache_y_ = ((uint8_t*)mem) + extra_y;
|
||||
dec->cache_y_ = mem + extra_y;
|
||||
dec->cache_u_ = dec->cache_y_
|
||||
+ 16 * num_caches * dec->cache_y_stride_ + extra_uv;
|
||||
dec->cache_v_ = dec->cache_u_
|
||||
@ -776,7 +776,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
mem += cache_size;
|
||||
|
||||
// alpha plane
|
||||
dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
|
||||
dec->alpha_plane_ = alpha_size ? mem : NULL;
|
||||
mem += alpha_size;
|
||||
assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
|
||||
|
||||
|
@ -15,10 +15,10 @@
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./alphai_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/dec/alphai_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
// In append mode, buffer allocations increase as multiples of this value.
|
||||
// Needs to be a power of 2.
|
||||
@ -673,12 +673,12 @@ void WebPIDelete(WebPIDecoder* idec) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Wrapper toward WebPINewDecoder
|
||||
|
||||
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
|
||||
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
|
||||
size_t output_buffer_size, int output_stride) {
|
||||
const int is_external_memory = (output_buffer != NULL) ? 1 : 0;
|
||||
WebPIDecoder* idec;
|
||||
|
||||
if (mode >= MODE_YUV) return NULL;
|
||||
if (csp >= MODE_YUV) return NULL;
|
||||
if (is_external_memory == 0) { // Overwrite parameters to sane values.
|
||||
output_buffer_size = 0;
|
||||
output_stride = 0;
|
||||
@ -689,7 +689,7 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
|
||||
}
|
||||
idec = WebPINewDecoder(NULL);
|
||||
if (idec == NULL) return NULL;
|
||||
idec->output_.colorspace = mode;
|
||||
idec->output_.colorspace = csp;
|
||||
idec->output_.is_external_memory = is_external_memory;
|
||||
idec->output_.u.RGBA.rgba = output_buffer;
|
||||
idec->output_.u.RGBA.stride = output_stride;
|
||||
|
@ -13,11 +13,11 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../dsp/yuv.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main YUV<->RGB conversion functions
|
||||
@ -212,7 +212,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
|
||||
int num_rows;
|
||||
const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
|
||||
uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
uint8_t* alpha_dst = base_rgba;
|
||||
#else
|
||||
uint8_t* alpha_dst = base_rgba + 1;
|
||||
@ -241,6 +241,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
|
||||
//------------------------------------------------------------------------------
|
||||
// YUV rescaling (no final RGB conversion needed)
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
static int Rescale(const uint8_t* src, int src_stride,
|
||||
int new_lines, WebPRescaler* const wrk) {
|
||||
int num_lines_out = 0;
|
||||
@ -431,7 +432,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
|
||||
int max_lines_out) {
|
||||
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
|
||||
uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
uint8_t* alpha_dst = base_rgba;
|
||||
#else
|
||||
uint8_t* alpha_dst = base_rgba + 1;
|
||||
@ -541,6 +542,8 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Default custom functions
|
||||
|
||||
@ -561,10 +564,14 @@ static int CustomSetup(VP8Io* io) {
|
||||
WebPInitUpsamplers();
|
||||
}
|
||||
if (io->use_scaling) {
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
|
||||
if (!ok) {
|
||||
return 0; // memory error
|
||||
}
|
||||
#else
|
||||
return 0; // rescaling support not compiled
|
||||
#endif
|
||||
} else {
|
||||
if (is_rgb) {
|
||||
WebPInitSamplers();
|
||||
@ -598,9 +605,6 @@ static int CustomSetup(VP8Io* io) {
|
||||
}
|
||||
}
|
||||
|
||||
if (is_rgb) {
|
||||
VP8YUVInit();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./vp8i_dec.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
|
||||
static WEBP_INLINE int clip(int v, int M) {
|
||||
return v < 0 ? 0 : v > M ? M : v;
|
||||
|
@ -11,15 +11,19 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./vp8i_dec.h"
|
||||
#include "../utils/bit_reader_inl_utils.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/bit_reader_inl_utils.h"
|
||||
|
||||
#if !defined(USE_GENERIC_TREE)
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
|
||||
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
|
||||
#define USE_GENERIC_TREE
|
||||
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
|
||||
#else
|
||||
#define USE_GENERIC_TREE 0
|
||||
#endif
|
||||
#endif // USE_GENERIC_TREE
|
||||
|
||||
#ifdef USE_GENERIC_TREE
|
||||
#if (USE_GENERIC_TREE == 1)
|
||||
static const int8_t kYModesIntra4[18] = {
|
||||
-B_DC_PRED, 1,
|
||||
-B_TM_PRED, 2,
|
||||
@ -317,7 +321,7 @@ static void ParseIntraMode(VP8BitReader* const br,
|
||||
int x;
|
||||
for (x = 0; x < 4; ++x) {
|
||||
const uint8_t* const prob = kBModesProba[top[x]][ymode];
|
||||
#ifdef USE_GENERIC_TREE
|
||||
#if (USE_GENERIC_TREE == 1)
|
||||
// Generic tree-parsing
|
||||
int i = kYModesIntra4[VP8GetBit(br, prob[0])];
|
||||
while (i > 0) {
|
||||
@ -335,7 +339,7 @@ static void ParseIntraMode(VP8BitReader* const br,
|
||||
(!VP8GetBit(br, prob[6]) ? B_LD_PRED :
|
||||
(!VP8GetBit(br, prob[7]) ? B_VL_PRED :
|
||||
(!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
|
||||
#endif // USE_GENERIC_TREE
|
||||
#endif // USE_GENERIC_TREE
|
||||
top[x] = ymode;
|
||||
}
|
||||
memcpy(modes, top, 4 * sizeof(*top));
|
||||
@ -498,7 +502,7 @@ static const uint8_t
|
||||
|
||||
// Paragraph 9.9
|
||||
|
||||
static const int kBands[16 + 1] = {
|
||||
static const uint8_t kBands[16 + 1] = {
|
||||
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
0 // extra entry as sentinel
|
||||
};
|
||||
|
@ -13,12 +13,12 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./alphai_dec.h"
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/bit_reader_inl_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/dec/alphai_dec.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/bit_reader_inl_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
@ -11,10 +11,10 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_WEBP_DECODE_VP8_H_
|
||||
#define WEBP_WEBP_DECODE_VP8_H_
|
||||
#ifndef WEBP_DEC_VP8_DEC_H_
|
||||
#define WEBP_DEC_VP8_DEC_H_
|
||||
|
||||
#include "../webp/decode.h"
|
||||
#include "src/webp/decode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -157,24 +157,24 @@ void VP8Delete(VP8Decoder* const dec);
|
||||
// Miscellaneous VP8/VP8L bitstream probing functions.
|
||||
|
||||
// Returns true if the next 3 bytes in data contain the VP8 signature.
|
||||
WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);
|
||||
WEBP_EXTERN int VP8CheckSignature(const uint8_t* const data, size_t data_size);
|
||||
|
||||
// Validates the VP8 data-header and retrieves basic header information viz
|
||||
// width and height. Returns 0 in case of formatting error. *width/*height
|
||||
// can be passed NULL.
|
||||
WEBP_EXTERN(int) VP8GetInfo(
|
||||
WEBP_EXTERN int VP8GetInfo(
|
||||
const uint8_t* data,
|
||||
size_t data_size, // data available so far
|
||||
size_t chunk_size, // total data size expected in the chunk
|
||||
int* const width, int* const height);
|
||||
|
||||
// Returns true if the next byte(s) in data is a VP8L signature.
|
||||
WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);
|
||||
WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
|
||||
|
||||
// Validates the VP8L data-header and retrieves basic header information viz
|
||||
// width, height and alpha. Returns 0 in case of formatting error.
|
||||
// width/height/has_alpha can be passed NULL.
|
||||
WEBP_EXTERN(int) VP8LGetInfo(
|
||||
WEBP_EXTERN int VP8LGetInfo(
|
||||
const uint8_t* data, size_t data_size, // data available so far
|
||||
int* const width, int* const height, int* const has_alpha);
|
||||
|
||||
@ -182,4 +182,4 @@ WEBP_EXTERN(int) VP8LGetInfo(
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_WEBP_DECODE_VP8_H_ */
|
||||
#endif /* WEBP_DEC_VP8_DEC_H_ */
|
||||
|
@ -11,16 +11,16 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_VP8I_H_
|
||||
#define WEBP_DEC_VP8I_H_
|
||||
#ifndef WEBP_DEC_VP8I_DEC_H_
|
||||
#define WEBP_DEC_VP8I_DEC_H_
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
#include "./common_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "../utils/bit_reader_utils.h"
|
||||
#include "../utils/random_utils.h"
|
||||
#include "../utils/thread_utils.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "src/dec/common_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/utils/bit_reader_utils.h"
|
||||
#include "src/utils/random_utils.h"
|
||||
#include "src/utils/thread_utils.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -32,7 +32,7 @@ extern "C" {
|
||||
// version numbers
|
||||
#define DEC_MAJ_VERSION 0
|
||||
#define DEC_MIN_VERSION 6
|
||||
#define DEC_REV_VERSION 0
|
||||
#define DEC_REV_VERSION 1
|
||||
|
||||
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||
// Constraints are: We need to store one 16x16 block of luma samples (y),
|
||||
@ -57,7 +57,6 @@ extern "C" {
|
||||
// '|' = left sample, '-' = top sample, '+' = top-left sample
|
||||
// 't' = extra top-right sample for 4x4 modes
|
||||
#define YUV_SIZE (BPS * 17 + BPS * 9)
|
||||
#define Y_SIZE (BPS * 17)
|
||||
#define Y_OFF (BPS * 1 + 8)
|
||||
#define U_OFF (Y_OFF + BPS * 16 + BPS)
|
||||
#define V_OFF (U_OFF + 16)
|
||||
@ -317,4 +316,4 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_VP8I_H_ */
|
||||
#endif /* WEBP_DEC_VP8I_DEC_H_ */
|
||||
|
@ -14,22 +14,22 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./alphai_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "../dsp/dsp.h"
|
||||
#include "../dsp/lossless.h"
|
||||
#include "../dsp/lossless_common.h"
|
||||
#include "../dsp/yuv.h"
|
||||
#include "../utils/endian_inl_utils.h"
|
||||
#include "../utils/huffman_utils.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/dec/alphai_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include "src/utils/huffman_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
#define NUM_ARGB_CACHE_ROWS 16
|
||||
|
||||
static const int kCodeLengthLiterals = 16;
|
||||
static const int kCodeLengthRepeatCode = 16;
|
||||
static const int kCodeLengthExtraBits[3] = { 2, 3, 7 };
|
||||
static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
|
||||
static const uint8_t kCodeLengthExtraBits[3] = { 2, 3, 7 };
|
||||
static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Five Huffman codes are used at each meta code:
|
||||
@ -86,7 +86,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
|
||||
// All values computed for 8-bit first level lookup with Mark Adler's tool:
|
||||
// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
|
||||
#define FIXED_TABLE_SIZE (630 * 3 + 410)
|
||||
static const int kTableSize[12] = {
|
||||
static const uint16_t kTableSize[12] = {
|
||||
FIXED_TABLE_SIZE + 654,
|
||||
FIXED_TABLE_SIZE + 656,
|
||||
FIXED_TABLE_SIZE + 658,
|
||||
@ -485,6 +485,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
//------------------------------------------------------------------------------
|
||||
// Scaling.
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
const int num_channels = 4;
|
||||
const int in_width = io->mb_w;
|
||||
@ -516,10 +517,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
out_width, out_height, 0, num_channels, work);
|
||||
return 1;
|
||||
}
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Export to ARGB
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
// We have special "export" function since we need to convert from BGRA
|
||||
static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
|
||||
int rgba_stride, uint8_t* const rgba) {
|
||||
@ -561,6 +565,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
|
||||
return num_lines_out;
|
||||
}
|
||||
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
|
||||
// Emit rows without any scaling.
|
||||
static int EmitRows(WEBP_CSP_MODE colorspace,
|
||||
const uint8_t* row_in, int in_stride,
|
||||
@ -746,9 +752,12 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
|
||||
if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA
|
||||
const WebPRGBABuffer* const buf = &output->u.RGBA;
|
||||
uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
|
||||
const int num_rows_out = io->use_scaling ?
|
||||
const int num_rows_out =
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
io->use_scaling ?
|
||||
EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
|
||||
rgba, buf->stride) :
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
EmitRows(output->colorspace, rows_data, in_stride,
|
||||
io->mb_w, io->mb_h, rgba, buf->stride);
|
||||
// Update 'last_out_row_'.
|
||||
@ -1632,12 +1641,19 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
|
||||
|
||||
if (!AllocateInternalBuffers32b(dec, io->width)) goto Err;
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
|
||||
|
||||
if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
|
||||
// need the alpha-multiply functions for premultiplied output or rescaling
|
||||
WebPInitAlphaProcessing();
|
||||
}
|
||||
#else
|
||||
if (io->use_scaling) {
|
||||
dec->status_ = VP8_STATUS_INVALID_PARAM;
|
||||
goto Err;
|
||||
}
|
||||
#endif
|
||||
if (!WebPIsRGBMode(dec->output_->colorspace)) {
|
||||
WebPInitConvertARGBToYUV();
|
||||
if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing();
|
||||
|
@ -12,14 +12,14 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
// Vikas Arora(vikaas.arora@gmail.com)
|
||||
|
||||
#ifndef WEBP_DEC_VP8LI_H_
|
||||
#define WEBP_DEC_VP8LI_H_
|
||||
#ifndef WEBP_DEC_VP8LI_DEC_H_
|
||||
#define WEBP_DEC_VP8LI_DEC_H_
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/bit_reader_utils.h"
|
||||
#include "../utils/color_cache_utils.h"
|
||||
#include "../utils/huffman_utils.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/bit_reader_utils.h"
|
||||
#include "src/utils/color_cache_utils.h"
|
||||
#include "src/utils/huffman_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -132,4 +132,4 @@ void VP8LDelete(VP8LDecoder* const dec);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_VP8LI_H_ */
|
||||
#endif /* WEBP_DEC_VP8LI_DEC_H_ */
|
||||
|
@ -13,11 +13,11 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./vp8i_dec.h"
|
||||
#include "./vp8li_dec.h"
|
||||
#include "./webpi_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/mux_types.h" // ALPHA_FLAG
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/mux_types.h" // ALPHA_FLAG
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// RIFF layout is:
|
||||
@ -421,7 +421,9 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
|
||||
NULL, NULL, NULL, &has_animation,
|
||||
NULL, headers);
|
||||
if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
|
||||
// TODO(jzern): full support of animation frames will require API additions.
|
||||
// The WebPDemux API + libwebp can be used to decode individual
|
||||
// uncomposited frames or the WebPAnimDecoder can be used to fully
|
||||
// reconstruct them (see webp/demux.h).
|
||||
if (has_animation) {
|
||||
status = VP8_STATUS_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
@ -11,15 +11,15 @@
|
||||
//
|
||||
// Author: somnath@google.com (Somnath Banerjee)
|
||||
|
||||
#ifndef WEBP_DEC_WEBPI_H_
|
||||
#define WEBP_DEC_WEBPI_H_
|
||||
#ifndef WEBP_DEC_WEBPI_DEC_H_
|
||||
#define WEBP_DEC_WEBPI_DEC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "../utils/rescaler_utils.h"
|
||||
#include "./vp8_dec.h"
|
||||
#include "src/utils/rescaler_utils.h"
|
||||
#include "src/dec/vp8_dec.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// WebPDecParams: Decoding output parameters. Transient internal object.
|
||||
@ -130,4 +130,4 @@ int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* WEBP_DEC_WEBPI_H_ */
|
||||
#endif /* WEBP_DEC_WEBPI_DEC_H_ */
|
||||
|
@ -1,3 +1,4 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
|
||||
lib_LTLIBRARIES = libwebpdemux.la
|
||||
|
||||
libwebpdemux_la_SOURCES =
|
||||
@ -9,6 +10,6 @@ libwebpdemuxinclude_HEADERS += ../webp/mux_types.h
|
||||
libwebpdemuxinclude_HEADERS += ../webp/types.h
|
||||
|
||||
libwebpdemux_la_LIBADD = ../libwebp.la
|
||||
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:2:0
|
||||
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:3:0
|
||||
libwebpdemuxincludedir = $(includedir)/webp
|
||||
pkgconfig_DATA = libwebpdemux.pc
|
||||
|
@ -11,15 +11,15 @@
|
||||
//
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "../webp/config.h"
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/decode.h"
|
||||
#include "../webp/demux.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/decode.h"
|
||||
#include "src/webp/demux.h"
|
||||
|
||||
#define NUM_CHANNELS 4
|
||||
|
||||
|
@ -11,21 +11,21 @@
|
||||
//
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "../webp/config.h"
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../utils/utils.h"
|
||||
#include "../webp/decode.h" // WebPGetFeatures
|
||||
#include "../webp/demux.h"
|
||||
#include "../webp/format_constants.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/decode.h" // WebPGetFeatures
|
||||
#include "src/webp/demux.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
|
||||
#define DMUX_MAJ_VERSION 0
|
||||
#define DMUX_MIN_VERSION 3
|
||||
#define DMUX_REV_VERSION 2
|
||||
#define DMUX_REV_VERSION 3
|
||||
|
||||
typedef struct {
|
||||
size_t start_; // start location of the data
|
||||
@ -205,12 +205,14 @@ static void SetFrameInfo(size_t start_offset, size_t size,
|
||||
frame->complete_ = complete;
|
||||
}
|
||||
|
||||
// Store image bearing chunks to 'frame'.
|
||||
// Store image bearing chunks to 'frame'. 'min_size' is an optional size
|
||||
// requirement, it may be zero.
|
||||
static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
|
||||
MemBuffer* const mem, Frame* const frame) {
|
||||
int alpha_chunks = 0;
|
||||
int image_chunks = 0;
|
||||
int done = (MemDataSize(mem) < min_size);
|
||||
int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
|
||||
MemDataSize(mem) < min_size);
|
||||
ParseStatus status = PARSE_OK;
|
||||
|
||||
if (done) return PARSE_NEED_MORE_DATA;
|
||||
@ -401,9 +403,9 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
|
||||
frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
|
||||
if (frame == NULL) return PARSE_ERROR;
|
||||
|
||||
// For the single image case we allow parsing of a partial frame, but we need
|
||||
// at least CHUNK_HEADER_SIZE for parsing.
|
||||
status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame);
|
||||
// For the single image case we allow parsing of a partial frame, so no
|
||||
// minimum size is imposed here.
|
||||
status = StoreFrame(1, 0, &dmux->mem_, frame);
|
||||
if (status != PARSE_ERROR) {
|
||||
const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
|
||||
// Clear any alpha when the alpha flag is missing.
|
||||
|
@ -6,8 +6,8 @@
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 0,3,0,2
|
||||
PRODUCTVERSION 0,3,0,2
|
||||
FILEVERSION 0,3,0,3
|
||||
PRODUCTVERSION 0,3,0,3
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -24,12 +24,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libwebpdemux DLL"
|
||||
VALUE "FileVersion", "0.3.2"
|
||||
VALUE "FileVersion", "0.3.3"
|
||||
VALUE "InternalName", "libwebpdemux.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2017"
|
||||
VALUE "OriginalFilename", "libwebpdemux.dll"
|
||||
VALUE "ProductName", "WebP Image Demuxer"
|
||||
VALUE "ProductVersion", "0.3.2"
|
||||
VALUE "ProductVersion", "0.3.3"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -1,9 +1,15 @@
|
||||
noinst_LTLIBRARIES = libwebpdsp.la libwebpdsp_avx2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_sse2.la libwebpdspdecode_sse2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_sse41.la libwebpdspdecode_sse41.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_neon.la libwebpdspdecode_neon.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_msa.la libwebpdspdecode_msa.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_wasm.la
|
||||
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
|
||||
noinst_LTLIBRARIES =
|
||||
noinst_LTLIBRARIES += libwebpdsp.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_avx2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_sse2.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_sse2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_sse41.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_sse41.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_neon.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_neon.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_msa.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_msa.la
|
||||
|
||||
if BUILD_LIBWEBPDECODER
|
||||
noinst_LTLIBRARIES += libwebpdspdecode.la
|
||||
@ -40,8 +46,6 @@ COMMON_SOURCES += yuv_mips32.c
|
||||
COMMON_SOURCES += yuv_mips_dsp_r2.c
|
||||
|
||||
ENC_SOURCES =
|
||||
ENC_SOURCES += argb.c
|
||||
ENC_SOURCES += argb_mips_dsp_r2.c
|
||||
ENC_SOURCES += cost.c
|
||||
ENC_SOURCES += cost_mips32.c
|
||||
ENC_SOURCES += cost_mips_dsp_r2.c
|
||||
@ -97,12 +101,7 @@ libwebpdspdecode_msa_la_SOURCES += upsampling_msa.c
|
||||
libwebpdspdecode_msa_la_CPPFLAGS = $(libwebpdsp_msa_la_CPPFLAGS)
|
||||
libwebpdspdecode_msa_la_CFLAGS = $(libwebpdsp_msa_la_CFLAGS)
|
||||
|
||||
# WASM is not fully integrated into configure; the addition here keeps source
|
||||
# extraction by cmake simple.
|
||||
libwebpdspdecode_wasm_la_SOURCES = dec_wasm.c
|
||||
|
||||
libwebpdsp_sse2_la_SOURCES =
|
||||
libwebpdsp_sse2_la_SOURCES += argb_sse2.c
|
||||
libwebpdsp_sse2_la_SOURCES += cost_sse2.c
|
||||
libwebpdsp_sse2_la_SOURCES += enc_sse2.c
|
||||
libwebpdsp_sse2_la_SOURCES += lossless_enc_sse2.c
|
||||
@ -143,7 +142,8 @@ libwebpdsp_la_CPPFLAGS += $(AM_CPPFLAGS)
|
||||
libwebpdsp_la_CPPFLAGS += $(USE_EXPERIMENTAL_CODE) $(USE_SWAP_16BIT_CSP)
|
||||
libwebpdsp_la_LDFLAGS = -lm
|
||||
libwebpdsp_la_LIBADD =
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la libwebpdsp_sse2.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_sse2.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_sse41.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_neon.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_msa.la
|
||||
|
@ -12,10 +12,13 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <assert.h>
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
// Tables can be faster on some platform but incur some extra binary size (~2k).
|
||||
// #define USE_TABLES_FOR_ALPHA_MULT
|
||||
#if !defined(USE_TABLES_FOR_ALPHA_MULT)
|
||||
#define USE_TABLES_FOR_ALPHA_MULT 0 // ALTERNATE_CODE
|
||||
#endif
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@ -29,7 +32,7 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
|
||||
return v;
|
||||
}
|
||||
|
||||
#ifdef USE_TABLES_FOR_ALPHA_MULT
|
||||
#if (USE_TABLES_FOR_ALPHA_MULT == 1)
|
||||
|
||||
static const uint32_t kMultTables[2][256] = {
|
||||
{ // (255u << MFIX) / alpha
|
||||
@ -132,9 +135,9 @@ static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
|
||||
return inverse ? (255u << MFIX) / a : a * KINV_255;
|
||||
}
|
||||
|
||||
#endif // USE_TABLES_FOR_ALPHA_MULT
|
||||
#endif // USE_TABLES_FOR_ALPHA_MULT
|
||||
|
||||
void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
|
||||
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
const uint32_t argb = ptr[x];
|
||||
@ -154,8 +157,8 @@ void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
|
||||
}
|
||||
}
|
||||
|
||||
void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse) {
|
||||
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
const uint32_t a = alpha[x];
|
||||
@ -217,8 +220,9 @@ void WebPMultRows(uint8_t* ptr, int stride,
|
||||
#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
|
||||
#endif
|
||||
|
||||
static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
|
||||
int w, int h, int stride) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
|
||||
int w, int h, int stride) {
|
||||
while (h-- > 0) {
|
||||
uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
|
||||
const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
|
||||
@ -235,6 +239,7 @@ static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
|
||||
rgba += stride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
#undef MULTIPLIER
|
||||
#undef PREMULTIPLY
|
||||
|
||||
@ -254,9 +259,9 @@ static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
|
||||
return (x * m) >> 16;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
|
||||
int w, int h, int stride,
|
||||
int rg_byte_pos /* 0 or 1 */) {
|
||||
static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
|
||||
int w, int h, int stride,
|
||||
int rg_byte_pos /* 0 or 1 */) {
|
||||
while (h-- > 0) {
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
@ -275,15 +280,16 @@ static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
|
||||
}
|
||||
#undef MULTIPLIER
|
||||
|
||||
static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
|
||||
int w, int h, int stride) {
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
|
||||
static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
|
||||
int w, int h, int stride) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
|
||||
#else
|
||||
ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
|
||||
ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
@ -338,6 +344,36 @@ static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int HasAlpha8b_C(const uint8_t* src, int length) {
|
||||
while (length-- > 0) if (*src++ != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HasAlpha32b_C(const uint8_t* src, int length) {
|
||||
int x;
|
||||
for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple channel manipulations.
|
||||
|
||||
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
|
||||
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
|
||||
}
|
||||
|
||||
static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out) {
|
||||
int i, offset = 0;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
|
||||
offset += step;
|
||||
}
|
||||
}
|
||||
|
||||
void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
|
||||
void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
|
||||
@ -345,6 +381,11 @@ int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
|
||||
int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
|
||||
void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
|
||||
int (*WebPHasAlpha8b)(const uint8_t* src, int length);
|
||||
int (*WebPHasAlpha32b)(const uint8_t* src, int length);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Init function
|
||||
@ -360,15 +401,21 @@ static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
|
||||
if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WebPMultARGBRow = WebPMultARGBRowC;
|
||||
WebPMultRow = WebPMultRowC;
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
|
||||
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
|
||||
WebPMultARGBRow = WebPMultARGBRow_C;
|
||||
WebPMultRow = WebPMultRow_C;
|
||||
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C;
|
||||
|
||||
WebPPackRGB = PackRGB_C;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
|
||||
WebPDispatchAlpha = DispatchAlpha_C;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C;
|
||||
WebPExtractAlpha = ExtractAlpha_C;
|
||||
WebPExtractGreen = ExtractGreen_C;
|
||||
#endif
|
||||
|
||||
WebPHasAlpha8b = HasAlpha8b_C;
|
||||
WebPHasAlpha32b = HasAlpha32b_C;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@ -382,16 +429,31 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
WebPInitAlphaProcessingNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
WebPInitAlphaProcessingMIPSdspR2();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitAlphaProcessingNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(WebPMultARGBRow != NULL);
|
||||
assert(WebPMultRow != NULL);
|
||||
assert(WebPApplyAlphaMultiply != NULL);
|
||||
assert(WebPApplyAlphaMultiply4444 != NULL);
|
||||
assert(WebPDispatchAlpha != NULL);
|
||||
assert(WebPDispatchAlphaToGreen != NULL);
|
||||
assert(WebPExtractAlpha != NULL);
|
||||
assert(WebPExtractGreen != NULL);
|
||||
assert(WebPPackRGB != NULL);
|
||||
assert(WebPHasAlpha8b != NULL);
|
||||
assert(WebPHasAlpha32b != NULL);
|
||||
|
||||
alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
|
@ -12,13 +12,13 @@
|
||||
// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
|
||||
// Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
uint32_t alpha_mask = 0xffffffff;
|
||||
int i, j, temp0;
|
||||
|
||||
@ -79,7 +79,8 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
return (alpha_mask != 0xff);
|
||||
}
|
||||
|
||||
static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
|
||||
static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
|
||||
int inverse) {
|
||||
int x;
|
||||
const uint32_t c_00ffffff = 0x00ffffffu;
|
||||
const uint32_t c_ff000000 = 0xff000000u;
|
||||
@ -124,14 +125,54 @@ static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
|
||||
}
|
||||
}
|
||||
|
||||
static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, int step,
|
||||
uint32_t* out) {
|
||||
int temp0, temp1, temp2, offset;
|
||||
const int rest = len & 1;
|
||||
const int a = 0xff;
|
||||
const uint32_t* const loop_end = out + len - rest;
|
||||
__asm__ volatile (
|
||||
"xor %[offset], %[offset], %[offset] \n\t"
|
||||
"beq %[loop_end], %[out], 0f \n\t"
|
||||
"2: \n\t"
|
||||
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||
"addiu %[out], %[out], 4 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||
"sw %[temp0], -4(%[out]) \n\t"
|
||||
"addu %[offset], %[offset], %[step] \n\t"
|
||||
"bne %[loop_end], %[out], 2b \n\t"
|
||||
"0: \n\t"
|
||||
"beq %[rest], $zero, 1f \n\t"
|
||||
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||
"sw %[temp0], 0(%[out]) \n\t"
|
||||
"1: \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||
[offset]"=&r"(offset), [out]"+&r"(out)
|
||||
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
|
||||
[loop_end]"r"(loop_end), [rest]"r"(rest)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void WebPInitAlphaProcessingMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
|
||||
WebPDispatchAlpha = DispatchAlpha;
|
||||
WebPMultARGBRow = MultARGBRow;
|
||||
WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
|
||||
WebPMultARGBRow = MultARGBRow_MIPSdspR2;
|
||||
WebPPackRGB = PackRGB_MIPSdspR2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
@ -11,11 +11,11 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include "./neon.h"
|
||||
#include "src/dsp/neon.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
@ -11,16 +11,16 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
@ -72,9 +72,9 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
return (alpha_and != 0xff);
|
||||
}
|
||||
|
||||
static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
int i, j;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const int limit = width & ~15;
|
||||
@ -98,9 +98,9 @@ static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
|
||||
}
|
||||
}
|
||||
|
||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
@ -210,6 +210,61 @@ static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
|
||||
#undef MULTIPLIER
|
||||
#undef PREMULTIPLY
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Alpha detection
|
||||
|
||||
static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
|
||||
const __m128i all_0xff = _mm_set1_epi8(0xff);
|
||||
int i = 0;
|
||||
for (; i + 16 <= length; i += 16) {
|
||||
const __m128i v = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i bits = _mm_cmpeq_epi8(v, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i < length; ++i) if (src[i] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
|
||||
const __m128i alpha_mask = _mm_set1_epi32(0xff);
|
||||
const __m128i all_0xff = _mm_set1_epi8(0xff);
|
||||
int i = 0;
|
||||
// We don't know if we can access the last 3 bytes after the last alpha
|
||||
// value 'src[4 * length - 4]' (because we don't know if alpha is the first
|
||||
// or the last byte of the quadruplet). Hence the '-3' protection below.
|
||||
length = length * 4 - 3; // size in bytes
|
||||
for (; i + 64 <= length; i += 64) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
|
||||
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
|
||||
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
|
||||
const __m128i b2 = _mm_and_si128(a2, alpha_mask);
|
||||
const __m128i b3 = _mm_and_si128(a3, alpha_mask);
|
||||
const __m128i c0 = _mm_packs_epi32(b0, b1);
|
||||
const __m128i c1 = _mm_packs_epi32(b2, b3);
|
||||
const __m128i d = _mm_packus_epi16(c0, c1);
|
||||
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i + 32 <= length; i += 32) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
|
||||
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
|
||||
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
|
||||
const __m128i c = _mm_packs_epi32(b0, b1);
|
||||
const __m128i d = _mm_packus_epi16(c, c);
|
||||
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Apply alpha value to rows
|
||||
|
||||
@ -238,7 +293,7 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
|
||||
}
|
||||
}
|
||||
width -= x;
|
||||
if (width > 0) WebPMultARGBRowC(ptr + x, width, inverse);
|
||||
if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse);
|
||||
}
|
||||
|
||||
static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
@ -261,7 +316,7 @@ static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
}
|
||||
}
|
||||
width -= x;
|
||||
if (width > 0) WebPMultRowC(ptr + x, alpha + x, width, inverse);
|
||||
if (width > 0) WebPMultRow_C(ptr + x, alpha + x, width, inverse);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -273,9 +328,12 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
|
||||
WebPMultARGBRow = MultARGBRow_SSE2;
|
||||
WebPMultRow = MultRow_SSE2;
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply_SSE2;
|
||||
WebPDispatchAlpha = DispatchAlpha;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
|
||||
WebPExtractAlpha = ExtractAlpha;
|
||||
WebPDispatchAlpha = DispatchAlpha_SSE2;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
|
||||
WebPExtractAlpha = ExtractAlpha_SSE2;
|
||||
|
||||
WebPHasAlpha8b = HasAlpha8b_SSE2;
|
||||
WebPHasAlpha32b = HasAlpha32b_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
@ -19,9 +19,9 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
@ -82,7 +82,7 @@ static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
extern void WebPInitAlphaProcessingSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
|
||||
WebPExtractAlpha = ExtractAlpha;
|
||||
WebPExtractAlpha = ExtractAlpha_SSE41;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
@ -1,68 +0,0 @@
|
||||
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// ARGB making functions.
|
||||
//
|
||||
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
|
||||
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
|
||||
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
|
||||
}
|
||||
|
||||
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, uint32_t* out) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out) {
|
||||
int i, offset = 0;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
|
||||
offset += step;
|
||||
}
|
||||
}
|
||||
|
||||
void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
|
||||
const uint8_t*, int, uint32_t*);
|
||||
void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
|
||||
int, int, uint32_t*);
|
||||
|
||||
extern void VP8EncDspARGBInitMIPSdspR2(void);
|
||||
extern void VP8EncDspARGBInitSSE2(void);
|
||||
|
||||
static volatile VP8CPUInfo argb_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&argb_last_cpuinfo_used;
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
|
||||
if (argb_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
VP8PackARGB = PackARGB;
|
||||
VP8PackRGB = PackRGB;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
VP8EncDspARGBInitSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
VP8EncDspARGBInitMIPSdspR2();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
argb_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
@ -1,110 +0,0 @@
|
||||
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// ARGB making functions (mips version).
|
||||
//
|
||||
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, uint32_t* out) {
|
||||
int temp0, temp1, temp2, temp3, offset;
|
||||
const int rest = len & 1;
|
||||
const uint32_t* const loop_end = out + len - rest;
|
||||
const int step = 4;
|
||||
__asm__ volatile (
|
||||
"xor %[offset], %[offset], %[offset] \n\t"
|
||||
"beq %[loop_end], %[out], 0f \n\t"
|
||||
"2: \n\t"
|
||||
"lbux %[temp0], %[offset](%[a]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp3], %[offset](%[b]) \n\t"
|
||||
"ins %[temp1], %[temp0], 16, 16 \n\t"
|
||||
"ins %[temp3], %[temp2], 16, 16 \n\t"
|
||||
"addiu %[out], %[out], 4 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
|
||||
"sw %[temp0], -4(%[out]) \n\t"
|
||||
"addu %[offset], %[offset], %[step] \n\t"
|
||||
"bne %[loop_end], %[out], 2b \n\t"
|
||||
"0: \n\t"
|
||||
"beq %[rest], $zero, 1f \n\t"
|
||||
"lbux %[temp0], %[offset](%[a]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp3], %[offset](%[b]) \n\t"
|
||||
"ins %[temp1], %[temp0], 16, 16 \n\t"
|
||||
"ins %[temp3], %[temp2], 16, 16 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
|
||||
"sw %[temp0], 0(%[out]) \n\t"
|
||||
"1: \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||
[temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
|
||||
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
|
||||
[loop_end]"r"(loop_end), [rest]"r"(rest)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out) {
|
||||
int temp0, temp1, temp2, offset;
|
||||
const int rest = len & 1;
|
||||
const int a = 0xff;
|
||||
const uint32_t* const loop_end = out + len - rest;
|
||||
__asm__ volatile (
|
||||
"xor %[offset], %[offset], %[offset] \n\t"
|
||||
"beq %[loop_end], %[out], 0f \n\t"
|
||||
"2: \n\t"
|
||||
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||
"addiu %[out], %[out], 4 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||
"sw %[temp0], -4(%[out]) \n\t"
|
||||
"addu %[offset], %[offset], %[step] \n\t"
|
||||
"bne %[loop_end], %[out], 2b \n\t"
|
||||
"0: \n\t"
|
||||
"beq %[rest], $zero, 1f \n\t"
|
||||
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||
"sw %[temp0], 0(%[out]) \n\t"
|
||||
"1: \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||
[offset]"=&r"(offset), [out]"+&r"(out)
|
||||
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
|
||||
[loop_end]"r"(loop_end), [rest]"r"(rest)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void VP8EncDspARGBInitMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
|
||||
VP8PackARGB = PackARGB;
|
||||
VP8PackRGB = PackRGB;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
||||
WEBP_DSP_INIT_STUB(VP8EncDspARGBInitMIPSdspR2)
|
||||
|
||||
#endif // WEBP_USE_MIPS_DSP_R2
|
@ -1,53 +0,0 @@
|
||||
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// ARGB making functions (SSE2 version).
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "./lossless.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include <assert.h>
|
||||
#include <emmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, uint32_t* out) {
|
||||
(void)a;
|
||||
if (g == r + 1) { // RGBA input order. Need to swap R and B.
|
||||
assert(b == r + 2);
|
||||
assert(a == r + 3);
|
||||
VP8LConvertBGRAToRGBA((const uint32_t*)r, len, (uint8_t*)out);
|
||||
} else {
|
||||
assert(g == b + 1);
|
||||
assert(r == b + 2);
|
||||
assert(a == b + 3);
|
||||
memcpy(out, b, len * 4);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void VP8EncDspARGBInitSSE2(void);
|
||||
extern void VP8LDspInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) {
|
||||
VP8LDspInitSSE2();
|
||||
VP8PackARGB = PackARGB;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
WEBP_DSP_INIT_STUB(VP8EncDspARGBInitSSE2)
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
@ -9,8 +9,8 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Boolean-cost cost table
|
||||
@ -319,7 +319,7 @@ const uint8_t VP8EncBands[16 + 1] = {
|
||||
//------------------------------------------------------------------------------
|
||||
// Mode costs
|
||||
|
||||
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
|
||||
int n = res->first;
|
||||
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||
const int p0 = res->prob[n][ctx0][0];
|
||||
@ -354,8 +354,8 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_C(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
int n;
|
||||
res->last = -1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
@ -384,8 +384,8 @@ static volatile VP8CPUInfo cost_last_cpuinfo_used =
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
|
||||
if (cost_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
VP8GetResidualCost = GetResidualCost;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs;
|
||||
VP8GetResidualCost = GetResidualCost_C;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs_C;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
|
@ -9,13 +9,13 @@
|
||||
//
|
||||
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
|
||||
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
|
||||
int temp0, temp1;
|
||||
int v_reg, ctx_reg;
|
||||
int n = res->first;
|
||||
@ -96,8 +96,8 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
const int16_t* p_coeffs = (int16_t*)coeffs;
|
||||
int temp0, temp1, temp2, n, n1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
@ -143,8 +143,8 @@ static void SetResidualCoeffs(const int16_t* const coeffs,
|
||||
extern void VP8EncDspCostInitMIPS32(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) {
|
||||
VP8GetResidualCost = GetResidualCost;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs;
|
||||
VP8GetResidualCost = GetResidualCost_MIPS32;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs_MIPS32;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS32
|
||||
|
@ -9,13 +9,13 @@
|
||||
//
|
||||
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
|
||||
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
|
||||
int temp0, temp1;
|
||||
int v_reg, ctx_reg;
|
||||
int n = res->first;
|
||||
@ -97,7 +97,7 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
extern void VP8EncDspCostInitMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void) {
|
||||
VP8GetResidualCost = GetResidualCost;
|
||||
VP8GetResidualCost = GetResidualCost_MIPSdspR2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
@ -11,19 +11,19 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
|
||||
const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
|
||||
// Use SSE2 to compare 16 values with a single instruction.
|
||||
@ -42,7 +42,7 @@ static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
|
||||
res->coeffs = coeffs;
|
||||
}
|
||||
|
||||
static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
|
||||
static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
|
||||
uint8_t levels[16], ctxs[16];
|
||||
uint16_t abs_levels[16];
|
||||
int n = res->first;
|
||||
@ -108,8 +108,8 @@ static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
|
||||
extern void VP8EncDspCostInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {
|
||||
VP8SetResidualCoeffs = SetResidualCoeffsSSE2;
|
||||
VP8GetResidualCost = GetResidualCostSSE2;
|
||||
VP8SetResidualCoeffs = SetResidualCoeffs_SSE2;
|
||||
VP8GetResidualCost = GetResidualCost_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Christian Duvivier (cduvivier@google.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_HAVE_NEON_RTCD)
|
||||
#include <stdio.h>
|
||||
@ -23,13 +23,11 @@
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// x86/x86-64 micro-arch detection.
|
||||
// SSE2 detection.
|
||||
//
|
||||
|
||||
// skip x86 specific code for WASM builds
|
||||
#if defined(WEBP_USE_WASM)
|
||||
// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
|
||||
#elif (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
|
||||
#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
|
||||
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
|
||||
__asm__ volatile (
|
||||
"mov %%ebx, %%edi\n"
|
||||
@ -65,10 +63,8 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
|
||||
#define GetCPUInfo __cpuid
|
||||
#endif
|
||||
|
||||
// skip xgetbv definition for WASM builds
|
||||
#if defined(WEBP_USE_WASM)
|
||||
// NaCl has no support for xgetbv or the raw opcode.
|
||||
#elif !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
|
||||
#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
|
||||
static WEBP_INLINE uint64_t xgetbv(void) {
|
||||
const uint32_t ecx = 0;
|
||||
uint32_t eax, edx;
|
||||
@ -98,19 +94,7 @@ static WEBP_INLINE uint64_t xgetbv(void) {
|
||||
#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Platform specific VP8CPUInfo functions.
|
||||
//
|
||||
|
||||
// WASM needs to precede platform specific architecture checks as the defines
|
||||
// will still be present when building this target.
|
||||
#if defined(WEBP_USE_WASM)
|
||||
static int wasmCPUInfo(CPUFeature feature) {
|
||||
if (feature != kWASM) return 0;
|
||||
return 1;
|
||||
}
|
||||
VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
|
||||
#elif defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
|
||||
|
||||
// helper function for run-time detection of slow SSSE3 platforms
|
||||
static int CheckSlowModel(int info) {
|
||||
@ -159,7 +143,7 @@ static int x86CPUInfo(CPUFeature feature) {
|
||||
return !!(cpu_info[2] & (1 << 0));
|
||||
}
|
||||
if (feature == kSlowSSSE3) {
|
||||
if (is_intel && (cpu_info[2] & (1 << 0))) { // SSSE3?
|
||||
if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3?
|
||||
return CheckSlowModel(cpu_info[0]);
|
||||
}
|
||||
return 0;
|
||||
|
397
src/dsp/dec.c
397
src/dsp/dec.c
@ -11,9 +11,11 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
#include <assert.h>
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@ -25,7 +27,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
#define STORE(x, y, v) \
|
||||
dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
|
||||
dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
|
||||
|
||||
#define STORE2(y, dc, d, c) do { \
|
||||
const int DC = (dc); \
|
||||
@ -38,7 +40,8 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
#define MUL1(a) ((((a) * 20091) >> 16) + (a))
|
||||
#define MUL2(a) (((a) * 35468) >> 16)
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -78,7 +81,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
|
||||
// Simplified transform when only in[0], in[1] and in[4] are non-zero
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = MUL2(in[4]);
|
||||
const int d4 = MUL1(in[4]);
|
||||
@ -93,19 +96,21 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
#undef MUL2
|
||||
#undef STORE2
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne_C(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
TransformOne_C(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformUV(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformUV_C(const int16_t* in, uint8_t* dst) {
|
||||
VP8Transform(in + 0 * 16, dst, 1);
|
||||
VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
|
||||
}
|
||||
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
const int DC = in[0] + 4;
|
||||
int i, j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
@ -114,8 +119,9 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformDCUV(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
|
||||
if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
|
||||
if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
|
||||
@ -127,7 +133,8 @@ static void TransformDCUV(const int16_t* in, uint8_t* dst) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Paragraph 14.3
|
||||
|
||||
static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
int tmp[16];
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -153,6 +160,7 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
out += 64;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
|
||||
@ -161,6 +169,7 @@ void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
|
||||
#define DST(x, y) dst[(x) + (y) * BPS]
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
const uint8_t* top = dst - BPS;
|
||||
const uint8_t* const clip0 = VP8kclip1 - top[-1];
|
||||
@ -174,21 +183,21 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
dst += BPS;
|
||||
}
|
||||
}
|
||||
static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
|
||||
static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
|
||||
static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
|
||||
static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); }
|
||||
static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
|
||||
static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); }
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// 16x16
|
||||
|
||||
static void VE16(uint8_t* dst) { // vertical
|
||||
static void VE16_C(uint8_t* dst) { // vertical
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
memcpy(dst + j * BPS, dst - BPS, 16);
|
||||
}
|
||||
}
|
||||
|
||||
static void HE16(uint8_t* dst) { // horizontal
|
||||
static void HE16_C(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 16; j > 0; --j) {
|
||||
memset(dst, dst[-1], 16);
|
||||
@ -203,7 +212,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC16(uint8_t* dst) { // DC
|
||||
static void DC16_C(uint8_t* dst) { // DC
|
||||
int DC = 16;
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -212,7 +221,7 @@ static void DC16(uint8_t* dst) { // DC
|
||||
Put16(DC >> 5, dst);
|
||||
}
|
||||
|
||||
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
|
||||
static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available
|
||||
int DC = 8;
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -221,7 +230,7 @@ static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
|
||||
Put16(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
|
||||
static void DC16NoLeft_C(uint8_t* dst) { // DC with left samples not available
|
||||
int DC = 8;
|
||||
int i;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
@ -230,9 +239,10 @@ static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
|
||||
Put16(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
|
||||
static void DC16NoTopLeft_C(uint8_t* dst) { // DC with no top and left samples
|
||||
Put16(0x80, dst);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
|
||||
|
||||
@ -242,7 +252,8 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
|
||||
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
static void VE4(uint8_t* dst) { // vertical
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VE4_C(uint8_t* dst) { // vertical
|
||||
const uint8_t* top = dst - BPS;
|
||||
const uint8_t vals[4] = {
|
||||
AVG3(top[-1], top[0], top[1]),
|
||||
@ -255,8 +266,9 @@ static void VE4(uint8_t* dst) { // vertical
|
||||
memcpy(dst + i * BPS, vals, sizeof(vals));
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void HE4(uint8_t* dst) { // horizontal
|
||||
static void HE4_C(uint8_t* dst) { // horizontal
|
||||
const int A = dst[-1 - BPS];
|
||||
const int B = dst[-1];
|
||||
const int C = dst[-1 + BPS];
|
||||
@ -268,7 +280,8 @@ static void HE4(uint8_t* dst) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* dst) { // DC
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void DC4_C(uint8_t* dst) { // DC
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
|
||||
@ -276,7 +289,7 @@ static void DC4(uint8_t* dst) { // DC
|
||||
for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst) { // Down-right
|
||||
static void RD4_C(uint8_t* dst) { // Down-right
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@ -295,7 +308,7 @@ static void RD4(uint8_t* dst) { // Down-right
|
||||
DST(3, 0) = AVG3(D, C, B);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst) { // Down-Left
|
||||
static void LD4_C(uint8_t* dst) { // Down-Left
|
||||
const int A = dst[0 - BPS];
|
||||
const int B = dst[1 - BPS];
|
||||
const int C = dst[2 - BPS];
|
||||
@ -312,8 +325,9 @@ static void LD4(uint8_t* dst) { // Down-Left
|
||||
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
|
||||
DST(3, 3) = AVG3(G, H, H);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
static void VR4_C(uint8_t* dst) { // Vertical-Right
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@ -335,7 +349,7 @@ static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
static void VL4_C(uint8_t* dst) { // Vertical-Left
|
||||
const int A = dst[0 - BPS];
|
||||
const int B = dst[1 - BPS];
|
||||
const int C = dst[2 - BPS];
|
||||
@ -357,7 +371,7 @@ static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* dst) { // Horizontal-Up
|
||||
static void HU4_C(uint8_t* dst) { // Horizontal-Up
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@ -372,7 +386,7 @@ static void HU4(uint8_t* dst) { // Horizontal-Up
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* dst) { // Horizontal-Down
|
||||
static void HD4_C(uint8_t* dst) { // Horizontal-Down
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
const int K = dst[-1 + 2 * BPS];
|
||||
@ -404,14 +418,15 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES];
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma
|
||||
|
||||
static void VE8uv(uint8_t* dst) { // vertical
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VE8uv_C(uint8_t* dst) { // vertical
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
memcpy(dst + j * BPS, dst - BPS, 8);
|
||||
}
|
||||
}
|
||||
|
||||
static void HE8uv(uint8_t* dst) { // horizontal
|
||||
static void HE8uv_C(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
memset(dst, dst[-1], 8);
|
||||
@ -427,7 +442,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC8uv(uint8_t* dst) { // DC
|
||||
static void DC8uv_C(uint8_t* dst) { // DC
|
||||
int dc0 = 8;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -436,7 +451,7 @@ static void DC8uv(uint8_t* dst) { // DC
|
||||
Put8x8uv(dc0 >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
|
||||
static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples
|
||||
int dc0 = 4;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -445,7 +460,7 @@ static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
|
||||
static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples
|
||||
int dc0 = 4;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -454,17 +469,19 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
|
||||
static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing
|
||||
Put8x8uv(0x80, dst);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Edge filtering functions
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
// 4 pixels in, 2 pixels out
|
||||
static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
|
||||
static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
|
||||
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892]
|
||||
const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15]
|
||||
@ -474,7 +491,7 @@ static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
|
||||
}
|
||||
|
||||
// 4 pixels in, 4 pixels out
|
||||
static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
|
||||
static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
|
||||
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
const int a = 3 * (q0 - p0);
|
||||
const int a1 = VP8ksclip2[(a + 4) >> 3];
|
||||
@ -487,7 +504,7 @@ static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
|
||||
}
|
||||
|
||||
// 6 pixels in, 6 pixels out
|
||||
static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
|
||||
static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
|
||||
const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
|
||||
const int q0 = p[0], q1 = p[step], q2 = p[2*step];
|
||||
const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
|
||||
@ -503,18 +520,22 @@ static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
|
||||
p[ 2*step] = VP8kclip1[q2 - a3];
|
||||
}
|
||||
|
||||
static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
|
||||
static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
|
||||
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
|
||||
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
|
||||
return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static WEBP_INLINE int needs_filter2(const uint8_t* p,
|
||||
int step, int t, int it) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
|
||||
int step, int t, int it) {
|
||||
const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
|
||||
const int p0 = p[-step], q0 = p[0];
|
||||
const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
|
||||
@ -523,140 +544,159 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,
|
||||
VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
|
||||
VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple In-loop filtering (Paragraph 15.2)
|
||||
|
||||
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) {
|
||||
int i;
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
if (needs_filter(p + i, stride, thresh2)) {
|
||||
do_filter2(p + i, stride);
|
||||
if (NeedsFilter_C(p + i, stride, thresh2)) {
|
||||
DoFilter2_C(p + i, stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) {
|
||||
int i;
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
if (needs_filter(p + i * stride, 1, thresh2)) {
|
||||
do_filter2(p + i * stride, 1);
|
||||
if (NeedsFilter_C(p + i * stride, 1, thresh2)) {
|
||||
DoFilter2_C(p + i * stride, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4 * stride;
|
||||
SimpleVFilter16(p, stride, thresh);
|
||||
SimpleVFilter16_C(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4;
|
||||
SimpleHFilter16(p, stride, thresh);
|
||||
SimpleHFilter16_C(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Complex In-loop filtering (Paragraph 15.3)
|
||||
|
||||
static WEBP_INLINE void FilterLoop26(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh,
|
||||
int hev_thresh) {
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
while (size-- > 0) {
|
||||
if (needs_filter2(p, hstride, thresh2, ithresh)) {
|
||||
if (hev(p, hstride, hev_thresh)) {
|
||||
do_filter2(p, hstride);
|
||||
if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
|
||||
if (Hev(p, hstride, hev_thresh)) {
|
||||
DoFilter2_C(p, hstride);
|
||||
} else {
|
||||
do_filter6(p, hstride);
|
||||
DoFilter6_C(p, hstride);
|
||||
}
|
||||
}
|
||||
p += vstride;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void FilterLoop24(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
|
||||
int hstride, int vstride, int size,
|
||||
int thresh, int ithresh,
|
||||
int hev_thresh) {
|
||||
const int thresh2 = 2 * thresh + 1;
|
||||
while (size-- > 0) {
|
||||
if (needs_filter2(p, hstride, thresh2, ithresh)) {
|
||||
if (hev(p, hstride, hev_thresh)) {
|
||||
do_filter2(p, hstride);
|
||||
if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
|
||||
if (Hev(p, hstride, hev_thresh)) {
|
||||
DoFilter2_C(p, hstride);
|
||||
} else {
|
||||
do_filter4(p, hstride);
|
||||
DoFilter4_C(p, hstride);
|
||||
}
|
||||
}
|
||||
p += vstride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// on macroblock edges
|
||||
static void VFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
static void VFilter16_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
static void HFilter16_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
// on three inner edges
|
||||
static void VFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter16i_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4 * stride;
|
||||
FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void HFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter16i_C(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4;
|
||||
FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void DitherCombine8x8(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
int i, j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -700,7 +740,6 @@ extern void VP8DspInitNEON(void);
|
||||
extern void VP8DspInitMIPS32(void);
|
||||
extern void VP8DspInitMIPSdspR2(void);
|
||||
extern void VP8DspInitMSA(void);
|
||||
extern void VP8DspInitWASM(void);
|
||||
|
||||
static volatile VP8CPUInfo dec_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&dec_last_cpuinfo_used;
|
||||
@ -710,54 +749,66 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
||||
|
||||
VP8InitClipTables();
|
||||
|
||||
VP8TransformWHT = TransformWHT;
|
||||
VP8Transform = TransformTwo;
|
||||
VP8TransformUV = TransformUV;
|
||||
VP8TransformDC = TransformDC;
|
||||
VP8TransformDCUV = TransformDCUV;
|
||||
VP8TransformAC3 = TransformAC3;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8TransformWHT = TransformWHT_C;
|
||||
VP8Transform = TransformTwo_C;
|
||||
VP8TransformDC = TransformDC_C;
|
||||
VP8TransformAC3 = TransformAC3_C;
|
||||
#endif
|
||||
VP8TransformUV = TransformUV_C;
|
||||
VP8TransformDCUV = TransformDCUV_C;
|
||||
|
||||
VP8VFilter16 = VFilter16;
|
||||
VP8HFilter16 = HFilter16;
|
||||
VP8VFilter8 = VFilter8;
|
||||
VP8HFilter8 = HFilter8;
|
||||
VP8VFilter16i = VFilter16i;
|
||||
VP8HFilter16i = HFilter16i;
|
||||
VP8VFilter8i = VFilter8i;
|
||||
VP8HFilter8i = HFilter8i;
|
||||
VP8SimpleVFilter16 = SimpleVFilter16;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8VFilter16 = VFilter16_C;
|
||||
VP8VFilter16i = VFilter16i_C;
|
||||
VP8HFilter16 = HFilter16_C;
|
||||
VP8VFilter8 = VFilter8_C;
|
||||
VP8VFilter8i = VFilter8i_C;
|
||||
VP8SimpleVFilter16 = SimpleVFilter16_C;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16_C;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i_C;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i_C;
|
||||
#endif
|
||||
|
||||
VP8PredLuma4[0] = DC4;
|
||||
VP8PredLuma4[1] = TM4;
|
||||
VP8PredLuma4[2] = VE4;
|
||||
VP8PredLuma4[3] = HE4;
|
||||
VP8PredLuma4[4] = RD4;
|
||||
VP8PredLuma4[5] = VR4;
|
||||
VP8PredLuma4[6] = LD4;
|
||||
VP8PredLuma4[7] = VL4;
|
||||
VP8PredLuma4[8] = HD4;
|
||||
VP8PredLuma4[9] = HU4;
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
VP8HFilter16i = HFilter16i_C;
|
||||
VP8HFilter8 = HFilter8_C;
|
||||
VP8HFilter8i = HFilter8i_C;
|
||||
#endif
|
||||
|
||||
VP8PredLuma16[0] = DC16;
|
||||
VP8PredLuma16[1] = TM16;
|
||||
VP8PredLuma16[2] = VE16;
|
||||
VP8PredLuma16[3] = HE16;
|
||||
VP8PredLuma16[4] = DC16NoTop;
|
||||
VP8PredLuma16[5] = DC16NoLeft;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8PredLuma4[0] = DC4_C;
|
||||
VP8PredLuma4[1] = TM4_C;
|
||||
VP8PredLuma4[2] = VE4_C;
|
||||
VP8PredLuma4[4] = RD4_C;
|
||||
VP8PredLuma4[6] = LD4_C;
|
||||
#endif
|
||||
|
||||
VP8PredChroma8[0] = DC8uv;
|
||||
VP8PredChroma8[1] = TM8uv;
|
||||
VP8PredChroma8[2] = VE8uv;
|
||||
VP8PredChroma8[3] = HE8uv;
|
||||
VP8PredChroma8[4] = DC8uvNoTop;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft;
|
||||
VP8PredLuma4[3] = HE4_C;
|
||||
VP8PredLuma4[5] = VR4_C;
|
||||
VP8PredLuma4[7] = VL4_C;
|
||||
VP8PredLuma4[8] = HD4_C;
|
||||
VP8PredLuma4[9] = HU4_C;
|
||||
|
||||
VP8DitherCombine8x8 = DitherCombine8x8;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8PredLuma16[0] = DC16_C;
|
||||
VP8PredLuma16[1] = TM16_C;
|
||||
VP8PredLuma16[2] = VE16_C;
|
||||
VP8PredLuma16[3] = HE16_C;
|
||||
VP8PredLuma16[4] = DC16NoTop_C;
|
||||
VP8PredLuma16[5] = DC16NoLeft_C;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft_C;
|
||||
|
||||
VP8PredChroma8[0] = DC8uv_C;
|
||||
VP8PredChroma8[1] = TM8uv_C;
|
||||
VP8PredChroma8[2] = VE8uv_C;
|
||||
VP8PredChroma8[3] = HE8uv_C;
|
||||
VP8PredChroma8[4] = DC8uvNoTop_C;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft_C;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft_C;
|
||||
#endif
|
||||
|
||||
VP8DitherCombine8x8 = DitherCombine8x8_C;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@ -771,11 +822,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8DspInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
if (VP8GetCPUInfo(kMIPS32)) {
|
||||
VP8DspInitMIPS32();
|
||||
@ -790,12 +836,59 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
||||
if (VP8GetCPUInfo(kMSA)) {
|
||||
VP8DspInitMSA();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_WASM)
|
||||
if (VP8GetCPUInfo(kWASM)) {
|
||||
VP8DspInitWASM();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8DspInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(VP8TransformWHT != NULL);
|
||||
assert(VP8Transform != NULL);
|
||||
assert(VP8TransformDC != NULL);
|
||||
assert(VP8TransformAC3 != NULL);
|
||||
assert(VP8TransformUV != NULL);
|
||||
assert(VP8TransformDCUV != NULL);
|
||||
assert(VP8VFilter16 != NULL);
|
||||
assert(VP8HFilter16 != NULL);
|
||||
assert(VP8VFilter8 != NULL);
|
||||
assert(VP8HFilter8 != NULL);
|
||||
assert(VP8VFilter16i != NULL);
|
||||
assert(VP8HFilter16i != NULL);
|
||||
assert(VP8VFilter8i != NULL);
|
||||
assert(VP8HFilter8i != NULL);
|
||||
assert(VP8SimpleVFilter16 != NULL);
|
||||
assert(VP8SimpleHFilter16 != NULL);
|
||||
assert(VP8SimpleVFilter16i != NULL);
|
||||
assert(VP8SimpleHFilter16i != NULL);
|
||||
assert(VP8PredLuma4[0] != NULL);
|
||||
assert(VP8PredLuma4[1] != NULL);
|
||||
assert(VP8PredLuma4[2] != NULL);
|
||||
assert(VP8PredLuma4[3] != NULL);
|
||||
assert(VP8PredLuma4[4] != NULL);
|
||||
assert(VP8PredLuma4[5] != NULL);
|
||||
assert(VP8PredLuma4[6] != NULL);
|
||||
assert(VP8PredLuma4[7] != NULL);
|
||||
assert(VP8PredLuma4[8] != NULL);
|
||||
assert(VP8PredLuma4[9] != NULL);
|
||||
assert(VP8PredLuma16[0] != NULL);
|
||||
assert(VP8PredLuma16[1] != NULL);
|
||||
assert(VP8PredLuma16[2] != NULL);
|
||||
assert(VP8PredLuma16[3] != NULL);
|
||||
assert(VP8PredLuma16[4] != NULL);
|
||||
assert(VP8PredLuma16[5] != NULL);
|
||||
assert(VP8PredLuma16[6] != NULL);
|
||||
assert(VP8PredChroma8[0] != NULL);
|
||||
assert(VP8PredChroma8[1] != NULL);
|
||||
assert(VP8PredChroma8[2] != NULL);
|
||||
assert(VP8PredChroma8[3] != NULL);
|
||||
assert(VP8PredChroma8[4] != NULL);
|
||||
assert(VP8PredChroma8[5] != NULL);
|
||||
assert(VP8PredChroma8[6] != NULL);
|
||||
assert(VP8DitherCombine8x8 != NULL);
|
||||
|
||||
dec_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
|
@ -11,11 +11,14 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#define USE_STATIC_TABLES // undefine to have run-time table initialization
|
||||
// define to 0 to have run-time table initialization
|
||||
#if !defined(USE_STATIC_TABLES)
|
||||
#define USE_STATIC_TABLES 1 // ALTERNATE_CODE
|
||||
#endif
|
||||
|
||||
#ifdef USE_STATIC_TABLES
|
||||
#if (USE_STATIC_TABLES == 1)
|
||||
|
||||
static const uint8_t abs0[255 + 255 + 1] = {
|
||||
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
|
||||
@ -337,7 +340,7 @@ static uint8_t clip1[255 + 511 + 1];
|
||||
// and make sure it's set to true _last_ (so as to be thread-safe)
|
||||
static volatile int tables_ok = 0;
|
||||
|
||||
#endif
|
||||
#endif // USE_STATIC_TABLES
|
||||
|
||||
const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
|
||||
const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
|
||||
@ -345,7 +348,7 @@ const uint8_t* const VP8kclip1 = &clip1[255];
|
||||
const uint8_t* const VP8kabs0 = &abs0[255];
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
|
||||
#if !defined(USE_STATIC_TABLES)
|
||||
#if (USE_STATIC_TABLES == 0)
|
||||
int i;
|
||||
if (!tables_ok) {
|
||||
for (i = -255; i <= 255; ++i) {
|
||||
|
@ -12,11 +12,11 @@
|
||||
// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
|
||||
#include "./mips_macro.h"
|
||||
#include "src/dsp/mips_macro.h"
|
||||
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
static const int kC2 = 35468;
|
||||
|
@ -12,11 +12,11 @@
|
||||
// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
#include "./mips_macro.h"
|
||||
#include "src/dsp/mips_macro.h"
|
||||
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
static const int kC2 = 35468;
|
||||
|
@ -12,11 +12,11 @@
|
||||
// Author(s): Prashant Patil (prashant.patil@imgtec.com)
|
||||
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MSA)
|
||||
|
||||
#include "./msa_macro.h"
|
||||
#include "src/dsp/msa_macro.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -12,23 +12,25 @@
|
||||
// Author: somnath@google.com (Somnath Banerjee)
|
||||
// cduvivier@google.com (Christian Duvivier)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
// The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
|
||||
// one it seems => disable it by default. Uncomment the following to enable:
|
||||
// #define USE_TRANSFORM_AC3
|
||||
#if !defined(USE_TRANSFORM_AC3)
|
||||
#define USE_TRANSFORM_AC3 0 // ALTERNATE_CODE
|
||||
#endif
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include "./common_sse2.h"
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -193,7 +195,7 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(USE_TRANSFORM_AC3)
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
#define MUL(a, b) (((a) * (b)) >> 16)
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
@ -248,7 +250,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
_mm_subs_epu8((p), (q)))
|
||||
|
||||
// Shift each byte of "x" by 3 bits while preserving by the sign bit.
|
||||
static WEBP_INLINE void SignedShift8b(__m128i* const x) {
|
||||
static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i lo_0 = _mm_unpacklo_epi8(zero, *x);
|
||||
const __m128i hi_0 = _mm_unpackhi_epi8(zero, *x);
|
||||
@ -258,8 +260,8 @@ static WEBP_INLINE void SignedShift8b(__m128i* const x) {
|
||||
}
|
||||
|
||||
#define FLIP_SIGN_BIT2(a, b) { \
|
||||
a = _mm_xor_si128(a, sign_bit); \
|
||||
b = _mm_xor_si128(b, sign_bit); \
|
||||
(a) = _mm_xor_si128(a, sign_bit); \
|
||||
(b) = _mm_xor_si128(b, sign_bit); \
|
||||
}
|
||||
|
||||
#define FLIP_SIGN_BIT4(a, b, c, d) { \
|
||||
@ -268,11 +270,11 @@ static WEBP_INLINE void SignedShift8b(__m128i* const x) {
|
||||
}
|
||||
|
||||
// input/output is uint8_t
|
||||
static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int hev_thresh, __m128i* const not_hev) {
|
||||
static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int hev_thresh, __m128i* const not_hev) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i t_1 = MM_ABS(*p1, *p0);
|
||||
const __m128i t_2 = MM_ABS(*q1, *q0);
|
||||
@ -285,11 +287,11 @@ static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
|
||||
}
|
||||
|
||||
// input pixels are int8_t
|
||||
static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
__m128i* const delta) {
|
||||
static WEBP_INLINE void GetBaseDelta_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
__m128i* const delta) {
|
||||
// beware of addition order, for saturation!
|
||||
const __m128i p1_q1 = _mm_subs_epi8(*p1, *q1); // p1 - q1
|
||||
const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0); // q0 - p0
|
||||
@ -300,15 +302,16 @@ static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
|
||||
}
|
||||
|
||||
// input and output are int8_t
|
||||
static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
|
||||
const __m128i* const fl) {
|
||||
static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
|
||||
__m128i* const q0,
|
||||
const __m128i* const fl) {
|
||||
const __m128i k3 = _mm_set1_epi8(3);
|
||||
const __m128i k4 = _mm_set1_epi8(4);
|
||||
__m128i v3 = _mm_adds_epi8(*fl, k3);
|
||||
__m128i v4 = _mm_adds_epi8(*fl, k4);
|
||||
|
||||
SignedShift8b(&v4); // v4 >> 3
|
||||
SignedShift8b(&v3); // v3 >> 3
|
||||
SignedShift8b_SSE2(&v4); // v4 >> 3
|
||||
SignedShift8b_SSE2(&v3); // v3 >> 3
|
||||
*q0 = _mm_subs_epi8(*q0, v4); // q0 -= v4
|
||||
*p0 = _mm_adds_epi8(*p0, v3); // p0 += v3
|
||||
}
|
||||
@ -317,9 +320,9 @@ static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
|
||||
// Update operations:
|
||||
// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
|
||||
// Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
|
||||
static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
|
||||
const __m128i* const a0_lo,
|
||||
const __m128i* const a0_hi) {
|
||||
static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
|
||||
const __m128i* const a0_lo,
|
||||
const __m128i* const a0_hi) {
|
||||
const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
|
||||
const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
|
||||
const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
|
||||
@ -330,11 +333,11 @@ static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
|
||||
}
|
||||
|
||||
// input pixels are uint8_t
|
||||
static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, __m128i* const mask) {
|
||||
static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, __m128i* const mask) {
|
||||
const __m128i m_thresh = _mm_set1_epi8(thresh);
|
||||
const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1)
|
||||
const __m128i kFE = _mm_set1_epi8(0xFE);
|
||||
@ -353,28 +356,29 @@ static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
|
||||
// Edge filtering functions
|
||||
|
||||
// Applies filter on 2 pixels (p0 and q0)
|
||||
static WEBP_INLINE void DoFilter2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
int thresh) {
|
||||
static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
int thresh) {
|
||||
__m128i a, mask;
|
||||
const __m128i sign_bit = _mm_set1_epi8(0x80);
|
||||
// convert p1/q1 to int8_t (for GetBaseDelta)
|
||||
// convert p1/q1 to int8_t (for GetBaseDelta_SSE2)
|
||||
const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
|
||||
const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
|
||||
|
||||
NeedsFilter(p1, p0, q0, q1, thresh, &mask);
|
||||
NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &mask);
|
||||
|
||||
FLIP_SIGN_BIT2(*p0, *q0);
|
||||
GetBaseDelta(&p1s, p0, q0, &q1s, &a);
|
||||
GetBaseDelta_SSE2(&p1s, p0, q0, &q1s, &a);
|
||||
a = _mm_and_si128(a, mask); // mask filter values we don't care about
|
||||
DoSimpleFilter(p0, q0, &a);
|
||||
DoSimpleFilter_SSE2(p0, q0, &a);
|
||||
FLIP_SIGN_BIT2(*p0, *q0);
|
||||
}
|
||||
|
||||
// Applies filter on 4 pixels (p1, p0, q0 and q1)
|
||||
static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
const __m128i* const mask, int hev_thresh) {
|
||||
static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1,
|
||||
const __m128i* const mask,
|
||||
int hev_thresh) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i sign_bit = _mm_set1_epi8(0x80);
|
||||
const __m128i k64 = _mm_set1_epi8(64);
|
||||
@ -384,7 +388,7 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
|
||||
__m128i t1, t2, t3;
|
||||
|
||||
// compute hev mask
|
||||
GetNotHEV(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
|
||||
// convert to signed values
|
||||
FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
|
||||
@ -399,8 +403,8 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
|
||||
|
||||
t2 = _mm_adds_epi8(t1, k3); // 3 * (q0 - p0) + hev(p1 - q1) + 3
|
||||
t3 = _mm_adds_epi8(t1, k4); // 3 * (q0 - p0) + hev(p1 - q1) + 4
|
||||
SignedShift8b(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
|
||||
SignedShift8b(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
|
||||
SignedShift8b_SSE2(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
|
||||
SignedShift8b_SSE2(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
|
||||
*p0 = _mm_adds_epi8(*p0, t2); // p0 += t2
|
||||
*q0 = _mm_subs_epi8(*q0, t3); // q0 -= t3
|
||||
FLIP_SIGN_BIT2(*p0, *q0);
|
||||
@ -417,25 +421,26 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
|
||||
}
|
||||
|
||||
// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
|
||||
static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
|
||||
__m128i* const p0, __m128i* const q0,
|
||||
__m128i* const q1, __m128i* const q2,
|
||||
const __m128i* const mask, int hev_thresh) {
|
||||
static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
|
||||
__m128i* const p0, __m128i* const q0,
|
||||
__m128i* const q1, __m128i* const q2,
|
||||
const __m128i* const mask,
|
||||
int hev_thresh) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i sign_bit = _mm_set1_epi8(0x80);
|
||||
__m128i a, not_hev;
|
||||
|
||||
// compute hev mask
|
||||
GetNotHEV(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, ¬_hev);
|
||||
|
||||
FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
|
||||
FLIP_SIGN_BIT2(*p2, *q2);
|
||||
GetBaseDelta(p1, p0, q0, q1, &a);
|
||||
GetBaseDelta_SSE2(p1, p0, q0, q1, &a);
|
||||
|
||||
{ // do simple filter on pixels with hev
|
||||
const __m128i m = _mm_andnot_si128(not_hev, *mask);
|
||||
const __m128i f = _mm_and_si128(a, m);
|
||||
DoSimpleFilter(p0, q0, &f);
|
||||
DoSimpleFilter_SSE2(p0, q0, &f);
|
||||
}
|
||||
|
||||
{ // do strong filter on pixels with not hev
|
||||
@ -460,15 +465,15 @@ static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
|
||||
const __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo); // Filter * 27 + 63
|
||||
const __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi); // Filter * 27 + 63
|
||||
|
||||
Update2Pixels(p2, q2, &a2_lo, &a2_hi);
|
||||
Update2Pixels(p1, q1, &a1_lo, &a1_hi);
|
||||
Update2Pixels(p0, q0, &a0_lo, &a0_hi);
|
||||
Update2Pixels_SSE2(p2, q2, &a2_lo, &a2_hi);
|
||||
Update2Pixels_SSE2(p1, q1, &a1_lo, &a1_hi);
|
||||
Update2Pixels_SSE2(p0, q0, &a0_lo, &a0_hi);
|
||||
}
|
||||
}
|
||||
|
||||
// reads 8 rows across a vertical edge.
|
||||
static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
|
||||
__m128i* const p, __m128i* const q) {
|
||||
static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
|
||||
__m128i* const p, __m128i* const q) {
|
||||
// A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
|
||||
// A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
|
||||
const __m128i A0 = _mm_set_epi32(
|
||||
@ -494,11 +499,11 @@ static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
|
||||
*q = _mm_unpackhi_epi32(C0, C1);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Load16x4(const uint8_t* const r0,
|
||||
const uint8_t* const r8,
|
||||
int stride,
|
||||
__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1) {
|
||||
static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
|
||||
const uint8_t* const r8,
|
||||
int stride,
|
||||
__m128i* const p1, __m128i* const p0,
|
||||
__m128i* const q0, __m128i* const q1) {
|
||||
// Assume the pixels around the edge (|) are numbered as follows
|
||||
// 00 01 | 02 03
|
||||
// 10 11 | 12 13
|
||||
@ -514,8 +519,8 @@ static WEBP_INLINE void Load16x4(const uint8_t* const r0,
|
||||
// q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
|
||||
// p0 = f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
|
||||
// q1 = f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
|
||||
Load8x4(r0, stride, p1, q0);
|
||||
Load8x4(r8, stride, p0, q1);
|
||||
Load8x4_SSE2(r0, stride, p1, q0);
|
||||
Load8x4_SSE2(r8, stride, p0, q1);
|
||||
|
||||
{
|
||||
// p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
|
||||
@ -531,7 +536,8 @@ static WEBP_INLINE void Load16x4(const uint8_t* const r0,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
|
||||
static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
|
||||
uint8_t* dst, int stride) {
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i, dst += stride) {
|
||||
WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
|
||||
@ -540,12 +546,12 @@ static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
|
||||
}
|
||||
|
||||
// Transpose back and store
|
||||
static WEBP_INLINE void Store16x4(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
uint8_t* r0, uint8_t* r8,
|
||||
int stride) {
|
||||
static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
uint8_t* r0, uint8_t* r8,
|
||||
int stride) {
|
||||
__m128i t1, p1_s, p0_s, q0_s, q1_s;
|
||||
|
||||
// p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
|
||||
@ -572,55 +578,55 @@ static WEBP_INLINE void Store16x4(const __m128i* const p1,
|
||||
p1_s = _mm_unpacklo_epi16(t1, q1_s);
|
||||
q1_s = _mm_unpackhi_epi16(t1, q1_s);
|
||||
|
||||
Store4x4(&p0_s, r0, stride);
|
||||
Store4x4_SSE2(&p0_s, r0, stride);
|
||||
r0 += 4 * stride;
|
||||
Store4x4(&q0_s, r0, stride);
|
||||
Store4x4_SSE2(&q0_s, r0, stride);
|
||||
|
||||
Store4x4(&p1_s, r8, stride);
|
||||
Store4x4_SSE2(&p1_s, r8, stride);
|
||||
r8 += 4 * stride;
|
||||
Store4x4(&q1_s, r8, stride);
|
||||
Store4x4_SSE2(&q1_s, r8, stride);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple In-loop filtering (Paragraph 15.2)
|
||||
|
||||
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
// Load
|
||||
__m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
|
||||
__m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
|
||||
__m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
|
||||
__m128i q1 = _mm_loadu_si128((__m128i*)&p[stride]);
|
||||
|
||||
DoFilter2(&p1, &p0, &q0, &q1, thresh);
|
||||
DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
|
||||
|
||||
// Store
|
||||
_mm_storeu_si128((__m128i*)&p[-stride], p0);
|
||||
_mm_storeu_si128((__m128i*)&p[0], q0);
|
||||
}
|
||||
|
||||
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
__m128i p1, p0, q0, q1;
|
||||
|
||||
p -= 2; // beginning of p1
|
||||
|
||||
Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
|
||||
DoFilter2(&p1, &p0, &q0, &q1, thresh);
|
||||
Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
|
||||
DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
|
||||
Store16x4_SSE2(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
|
||||
}
|
||||
|
||||
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleVFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4 * stride;
|
||||
SimpleVFilter16(p, stride, thresh);
|
||||
SimpleVFilter16_SSE2(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
|
||||
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
|
||||
int k;
|
||||
for (k = 3; k > 0; --k) {
|
||||
p += 4;
|
||||
SimpleHFilter16(p, stride, thresh);
|
||||
SimpleHFilter16_SSE2(p, stride, thresh);
|
||||
}
|
||||
}
|
||||
|
||||
@ -628,60 +634,60 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
// Complex In-loop filtering (Paragraph 15.3)
|
||||
|
||||
#define MAX_DIFF1(p3, p2, p1, p0, m) do { \
|
||||
m = MM_ABS(p1, p0); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
(m) = MM_ABS(p1, p0); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
} while (0)
|
||||
|
||||
#define MAX_DIFF2(p3, p2, p1, p0, m) do { \
|
||||
m = _mm_max_epu8(m, MM_ABS(p1, p0)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
|
||||
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
||||
} while (0)
|
||||
|
||||
#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) { \
|
||||
e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]); \
|
||||
e2 = _mm_loadu_si128((__m128i*)&(p)[1 * stride]); \
|
||||
e3 = _mm_loadu_si128((__m128i*)&(p)[2 * stride]); \
|
||||
e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]); \
|
||||
(e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]); \
|
||||
(e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]); \
|
||||
(e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]); \
|
||||
(e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]); \
|
||||
}
|
||||
|
||||
#define LOADUV_H_EDGE(p, u, v, stride) do { \
|
||||
const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]); \
|
||||
const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]); \
|
||||
p = _mm_unpacklo_epi64(U, V); \
|
||||
(p) = _mm_unpacklo_epi64(U, V); \
|
||||
} while (0)
|
||||
|
||||
#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) { \
|
||||
LOADUV_H_EDGE(e1, u, v, 0 * stride); \
|
||||
LOADUV_H_EDGE(e2, u, v, 1 * stride); \
|
||||
LOADUV_H_EDGE(e3, u, v, 2 * stride); \
|
||||
LOADUV_H_EDGE(e4, u, v, 3 * stride); \
|
||||
LOADUV_H_EDGE(e1, u, v, 0 * (stride)); \
|
||||
LOADUV_H_EDGE(e2, u, v, 1 * (stride)); \
|
||||
LOADUV_H_EDGE(e3, u, v, 2 * (stride)); \
|
||||
LOADUV_H_EDGE(e4, u, v, 3 * (stride)); \
|
||||
}
|
||||
|
||||
#define STOREUV(p, u, v, stride) { \
|
||||
_mm_storel_epi64((__m128i*)&u[(stride)], p); \
|
||||
p = _mm_srli_si128(p, 8); \
|
||||
_mm_storel_epi64((__m128i*)&v[(stride)], p); \
|
||||
_mm_storel_epi64((__m128i*)&(u)[(stride)], p); \
|
||||
(p) = _mm_srli_si128(p, 8); \
|
||||
_mm_storel_epi64((__m128i*)&(v)[(stride)], p); \
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ComplexMask(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, int ithresh,
|
||||
__m128i* const mask) {
|
||||
static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
|
||||
const __m128i* const p0,
|
||||
const __m128i* const q0,
|
||||
const __m128i* const q1,
|
||||
int thresh, int ithresh,
|
||||
__m128i* const mask) {
|
||||
const __m128i it = _mm_set1_epi8(ithresh);
|
||||
const __m128i diff = _mm_subs_epu8(*mask, it);
|
||||
const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
|
||||
__m128i filter_mask;
|
||||
NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
|
||||
NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &filter_mask);
|
||||
*mask = _mm_and_si128(thresh_mask, filter_mask);
|
||||
}
|
||||
|
||||
// on macroblock edges
|
||||
static void VFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter16_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i t1;
|
||||
__m128i mask;
|
||||
__m128i p2, p1, p0, q0, q1, q2;
|
||||
@ -694,8 +700,8 @@ static void VFilter16(uint8_t* p, int stride,
|
||||
LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
|
||||
MAX_DIFF2(t1, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
_mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
|
||||
@ -706,28 +712,28 @@ static void VFilter16(uint8_t* p, int stride,
|
||||
_mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
|
||||
}
|
||||
|
||||
static void HFilter16(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter16_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
uint8_t* const b = p - 4;
|
||||
Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
|
||||
Load16x4_SSE2(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);
|
||||
MAX_DIFF1(p3, p2, p1, p0, mask);
|
||||
|
||||
Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
|
||||
MAX_DIFF2(q3, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
Store16x4(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
|
||||
Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
|
||||
Store16x4_SSE2(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
|
||||
}
|
||||
|
||||
// on three inner edges
|
||||
static void VFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter16i_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
__m128i p3, p2, p1, p0; // loop invariants
|
||||
|
||||
@ -744,8 +750,8 @@ static void VFilter16i(uint8_t* p, int stride,
|
||||
|
||||
// p3 and p2 are not just temporary variables here: they will be
|
||||
// re-used for next span. And q2/q3 will become p1/p0 accordingly.
|
||||
ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
_mm_storeu_si128((__m128i*)&b[0 * stride], p1);
|
||||
@ -759,12 +765,12 @@ static void VFilter16i(uint8_t* p, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void HFilter16i(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter16i_SSE2(uint8_t* p, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
int k;
|
||||
__m128i p3, p2, p1, p0; // loop invariants
|
||||
|
||||
Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
|
||||
|
||||
for (k = 3; k > 0; --k) {
|
||||
__m128i mask, tmp1, tmp2;
|
||||
@ -773,13 +779,13 @@ static void HFilter16i(uint8_t* p, int stride,
|
||||
p += 4; // beginning of q0 (and next span)
|
||||
|
||||
MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
|
||||
Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
|
||||
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
|
||||
MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
|
||||
|
||||
Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
|
||||
Store16x4_SSE2(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
|
||||
|
||||
// rotate samples
|
||||
p1 = tmp1;
|
||||
@ -788,8 +794,8 @@ static void HFilter16i(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, p2, p1, p0, q0, q1, q2;
|
||||
|
||||
@ -801,8 +807,8 @@ static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
|
||||
MAX_DIFF2(t1, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
STOREUV(p2, u, v, -3 * stride);
|
||||
@ -813,28 +819,28 @@ static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q2, u, v, 2 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
uint8_t* const tu = u - 4;
|
||||
uint8_t* const tv = v - 4;
|
||||
Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
|
||||
Load16x4_SSE2(tu, tv, stride, &p3, &p2, &p1, &p0);
|
||||
MAX_DIFF1(p3, p2, p1, p0, mask);
|
||||
|
||||
Load16x4(u, v, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
|
||||
Load16x4_SSE2(u, v, stride, &q0, &q1, &q2, &q3);
|
||||
MAX_DIFF2(q3, q2, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
|
||||
|
||||
Store16x4(&p3, &p2, &p1, &p0, tu, tv, stride);
|
||||
Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
Store16x4_SSE2(&p3, &p2, &p1, &p0, tu, tv, stride);
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
|
||||
@ -849,8 +855,8 @@ static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
|
||||
MAX_DIFF2(t2, t1, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
|
||||
// Store
|
||||
STOREUV(p1, u, v, -2 * stride);
|
||||
@ -859,24 +865,24 @@ static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q1, u, v, 1 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
Load16x4(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
|
||||
Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
|
||||
MAX_DIFF1(t2, t1, p1, p0, mask);
|
||||
|
||||
u += 4; // beginning of q0
|
||||
v += 4;
|
||||
Load16x4(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
|
||||
Load16x4_SSE2(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
|
||||
MAX_DIFF2(t2, t1, q1, q0, mask);
|
||||
|
||||
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
|
||||
DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
|
||||
|
||||
u -= 2; // beginning of p1
|
||||
v -= 2;
|
||||
Store16x4(&p1, &p0, &q0, &q1, u, v, stride);
|
||||
Store16x4_SSE2(&p1, &p0, &q0, &q1, u, v, stride);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -893,7 +899,7 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
|
||||
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
|
||||
|
||||
static void VE4(uint8_t* dst) { // vertical
|
||||
static void VE4_SSE2(uint8_t* dst) { // vertical
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -909,7 +915,7 @@ static void VE4(uint8_t* dst) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst) { // Down-Left
|
||||
static void LD4_SSE2(uint8_t* dst) { // Down-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -925,7 +931,7 @@ static void LD4(uint8_t* dst) { // Down-Left
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const int I = dst[-1 + 0 * BPS];
|
||||
const int J = dst[-1 + 1 * BPS];
|
||||
@ -950,7 +956,7 @@ static void VR4(uint8_t* dst) { // Vertical-Right
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
|
||||
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -975,7 +981,7 @@ static void VL4(uint8_t* dst) { // Vertical-Left
|
||||
DST(3, 3) = (extra_out >> 8) & 0xff;
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst) { // Down-right
|
||||
static void RD4_SSE2(uint8_t* dst) { // Down-right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
|
||||
const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
|
||||
@ -1004,7 +1010,7 @@ static void RD4(uint8_t* dst) { // Down-right
|
||||
//------------------------------------------------------------------------------
|
||||
// Luma 16x16
|
||||
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
|
||||
const uint8_t* top = dst - BPS;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
@ -1041,11 +1047,11 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
|
||||
static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
|
||||
static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
|
||||
static void TM4_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 4); }
|
||||
static void TM8uv_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 8); }
|
||||
static void TM16_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 16); }
|
||||
|
||||
static void VE16(uint8_t* dst) {
|
||||
static void VE16_SSE2(uint8_t* dst) {
|
||||
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -1053,7 +1059,7 @@ static void VE16(uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void HE16(uint8_t* dst) { // horizontal
|
||||
static void HE16_SSE2(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 16; j > 0; --j) {
|
||||
const __m128i values = _mm_set1_epi8(dst[-1]);
|
||||
@ -1062,7 +1068,7 @@ static void HE16(uint8_t* dst) { // horizontal
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -1070,7 +1076,7 @@ static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC16(uint8_t* dst) { // DC
|
||||
static void DC16_SSE2(uint8_t* dst) { // DC
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
|
||||
const __m128i sad8x2 = _mm_sad_epu8(top, zero);
|
||||
@ -1083,37 +1089,37 @@ static void DC16(uint8_t* dst) { // DC
|
||||
}
|
||||
{
|
||||
const int DC = _mm_cvtsi128_si32(sum) + left + 16;
|
||||
Put16(DC >> 5, dst);
|
||||
Put16_SSE2(DC >> 5, dst);
|
||||
}
|
||||
}
|
||||
|
||||
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
|
||||
static void DC16NoTop_SSE2(uint8_t* dst) { // DC with top samples unavailable
|
||||
int DC = 8;
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
DC += dst[-1 + j * BPS];
|
||||
}
|
||||
Put16(DC >> 4, dst);
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
|
||||
static void DC16NoLeft_SSE2(uint8_t* dst) { // DC with left samples unavailable
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
|
||||
const __m128i sad8x2 = _mm_sad_epu8(top, zero);
|
||||
// sum the two sads: sad8x2[0:1] + sad8x2[8:9]
|
||||
const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
|
||||
const int DC = _mm_cvtsi128_si32(sum) + 8;
|
||||
Put16(DC >> 4, dst);
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
|
||||
Put16(0x80, dst);
|
||||
static void DC16NoTopLeft_SSE2(uint8_t* dst) { // DC with no top & left samples
|
||||
Put16_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma
|
||||
|
||||
static void VE8uv(uint8_t* dst) { // vertical
|
||||
static void VE8uv_SSE2(uint8_t* dst) { // vertical
|
||||
int j;
|
||||
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -1121,17 +1127,8 @@ static void VE8uv(uint8_t* dst) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void HE8uv(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(dst[-1]);
|
||||
_mm_storel_epi64((__m128i*)dst, values);
|
||||
dst += BPS;
|
||||
}
|
||||
}
|
||||
|
||||
// helper for chroma-DC predictions
|
||||
static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -1139,7 +1136,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static void DC8uv(uint8_t* dst) { // DC
|
||||
static void DC8uv_SSE2(uint8_t* dst) { // DC
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
|
||||
const __m128i sum = _mm_sad_epu8(top, zero);
|
||||
@ -1150,29 +1147,29 @@ static void DC8uv(uint8_t* dst) { // DC
|
||||
}
|
||||
{
|
||||
const int DC = _mm_cvtsi128_si32(sum) + left + 8;
|
||||
Put8x8uv(DC >> 4, dst);
|
||||
Put8x8uv_SSE2(DC >> 4, dst);
|
||||
}
|
||||
}
|
||||
|
||||
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
|
||||
static void DC8uvNoLeft_SSE2(uint8_t* dst) { // DC with no left samples
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
|
||||
const __m128i sum = _mm_sad_epu8(top, zero);
|
||||
const int DC = _mm_cvtsi128_si32(sum) + 4;
|
||||
Put8x8uv(DC >> 3, dst);
|
||||
Put8x8uv_SSE2(DC >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
|
||||
static void DC8uvNoTop_SSE2(uint8_t* dst) { // DC with no top samples
|
||||
int dc0 = 4;
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
dc0 += dst[-1 + i * BPS];
|
||||
}
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
Put8x8uv_SSE2(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
|
||||
Put8x8uv(0x80, dst);
|
||||
static void DC8uvNoTopLeft_SSE2(uint8_t* dst) { // DC with nothing
|
||||
Put8x8uv_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -1181,47 +1178,46 @@ static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
|
||||
extern void VP8DspInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
|
||||
VP8Transform = Transform;
|
||||
#if defined(USE_TRANSFORM_AC3)
|
||||
VP8TransformAC3 = TransformAC3;
|
||||
VP8Transform = Transform_SSE2;
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
VP8TransformAC3 = TransformAC3_SSE2;
|
||||
#endif
|
||||
|
||||
VP8VFilter16 = VFilter16;
|
||||
VP8HFilter16 = HFilter16;
|
||||
VP8VFilter8 = VFilter8;
|
||||
VP8HFilter8 = HFilter8;
|
||||
VP8VFilter16i = VFilter16i;
|
||||
VP8HFilter16i = HFilter16i;
|
||||
VP8VFilter8i = VFilter8i;
|
||||
VP8HFilter8i = HFilter8i;
|
||||
VP8VFilter16 = VFilter16_SSE2;
|
||||
VP8HFilter16 = HFilter16_SSE2;
|
||||
VP8VFilter8 = VFilter8_SSE2;
|
||||
VP8HFilter8 = HFilter8_SSE2;
|
||||
VP8VFilter16i = VFilter16i_SSE2;
|
||||
VP8HFilter16i = HFilter16i_SSE2;
|
||||
VP8VFilter8i = VFilter8i_SSE2;
|
||||
VP8HFilter8i = HFilter8i_SSE2;
|
||||
|
||||
VP8SimpleVFilter16 = SimpleVFilter16;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i;
|
||||
VP8SimpleVFilter16 = SimpleVFilter16_SSE2;
|
||||
VP8SimpleHFilter16 = SimpleHFilter16_SSE2;
|
||||
VP8SimpleVFilter16i = SimpleVFilter16i_SSE2;
|
||||
VP8SimpleHFilter16i = SimpleHFilter16i_SSE2;
|
||||
|
||||
VP8PredLuma4[1] = TM4;
|
||||
VP8PredLuma4[2] = VE4;
|
||||
VP8PredLuma4[4] = RD4;
|
||||
VP8PredLuma4[5] = VR4;
|
||||
VP8PredLuma4[6] = LD4;
|
||||
VP8PredLuma4[7] = VL4;
|
||||
VP8PredLuma4[1] = TM4_SSE2;
|
||||
VP8PredLuma4[2] = VE4_SSE2;
|
||||
VP8PredLuma4[4] = RD4_SSE2;
|
||||
VP8PredLuma4[5] = VR4_SSE2;
|
||||
VP8PredLuma4[6] = LD4_SSE2;
|
||||
VP8PredLuma4[7] = VL4_SSE2;
|
||||
|
||||
VP8PredLuma16[0] = DC16;
|
||||
VP8PredLuma16[1] = TM16;
|
||||
VP8PredLuma16[2] = VE16;
|
||||
VP8PredLuma16[3] = HE16;
|
||||
VP8PredLuma16[4] = DC16NoTop;
|
||||
VP8PredLuma16[5] = DC16NoLeft;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft;
|
||||
VP8PredLuma16[0] = DC16_SSE2;
|
||||
VP8PredLuma16[1] = TM16_SSE2;
|
||||
VP8PredLuma16[2] = VE16_SSE2;
|
||||
VP8PredLuma16[3] = HE16_SSE2;
|
||||
VP8PredLuma16[4] = DC16NoTop_SSE2;
|
||||
VP8PredLuma16[5] = DC16NoLeft_SSE2;
|
||||
VP8PredLuma16[6] = DC16NoTopLeft_SSE2;
|
||||
|
||||
VP8PredChroma8[0] = DC8uv;
|
||||
VP8PredChroma8[1] = TM8uv;
|
||||
VP8PredChroma8[2] = VE8uv;
|
||||
VP8PredChroma8[3] = HE8uv;
|
||||
VP8PredChroma8[4] = DC8uvNoTop;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft;
|
||||
VP8PredChroma8[0] = DC8uv_SSE2;
|
||||
VP8PredChroma8[1] = TM8uv_SSE2;
|
||||
VP8PredChroma8[2] = VE8uv_SSE2;
|
||||
VP8PredChroma8[4] = DC8uvNoTop_SSE2;
|
||||
VP8PredChroma8[5] = DC8uvNoLeft_SSE2;
|
||||
VP8PredChroma8[6] = DC8uvNoTopLeft_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -11,15 +11,15 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include "../dec/vp8i_dec.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
static void HE16(uint8_t* dst) { // horizontal
|
||||
static void HE16_SSE41(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
const __m128i kShuffle3 = _mm_set1_epi8(3);
|
||||
for (j = 16; j > 0; --j) {
|
||||
@ -36,7 +36,7 @@ static void HE16(uint8_t* dst) { // horizontal
|
||||
extern void VP8DspInitSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
|
||||
VP8PredLuma16[3] = HE16;
|
||||
VP8PredLuma16[3] = HE16_SSE41;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
1618
src/dsp/dec_wasm.c
1618
src/dsp/dec_wasm.c
File diff suppressed because it is too large
Load Diff
@ -15,10 +15,10 @@
|
||||
#define WEBP_DSP_DSP_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "../webp/config.h"
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "../webp/types.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -51,9 +51,8 @@ extern "C" {
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
// For now, none of the optimizations below are available in emscripten.
|
||||
// WebAssembly overrides native optimizations.
|
||||
#if !(defined(EMSCRIPTEN) || defined(WEBP_USE_WASM))
|
||||
// for now, none of the optimizations below are available in emscripten
|
||||
#if !defined(EMSCRIPTEN)
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
@ -105,7 +104,7 @@ extern "C" {
|
||||
#define WEBP_USE_MIPS32
|
||||
#if (__mips_isa_rev >= 2)
|
||||
#define WEBP_USE_MIPS32_R2
|
||||
#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
|
||||
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
|
||||
#define WEBP_USE_MIPS_DSP_R2
|
||||
#endif
|
||||
#endif
|
||||
@ -117,6 +116,22 @@ extern "C" {
|
||||
|
||||
#endif /* EMSCRIPTEN */
|
||||
|
||||
#ifndef WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_DSP_OMIT_C_CODE 1
|
||||
#endif
|
||||
|
||||
#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_NEON_OMIT_C_CODE 1
|
||||
#else
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
#endif
|
||||
|
||||
// This macro prevents thread_sanitizer from reporting known concurrent writes.
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION
|
||||
#if defined(__has_feature)
|
||||
@ -146,6 +161,11 @@ extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#define WEBP_SWAP_16BIT_CSP 0
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
kSSE2,
|
||||
kSSE3,
|
||||
@ -156,12 +176,11 @@ typedef enum {
|
||||
kNEON,
|
||||
kMIPS32,
|
||||
kMIPSdspR2,
|
||||
kMSA,
|
||||
kWASM
|
||||
kMSA
|
||||
} CPUFeature;
|
||||
// returns true if the CPU supports the feature.
|
||||
typedef int (*VP8CPUInfo)(CPUFeature feature);
|
||||
WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Init stub generator
|
||||
@ -289,6 +308,7 @@ typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
|
||||
int xo, int yo, // center position
|
||||
int W, int H); // plane dimension
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
// This version is called with the guarantee that you can load 8 bytes and
|
||||
// 8 rows at offset src1 and src2
|
||||
typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
|
||||
@ -296,10 +316,13 @@ typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
|
||||
|
||||
extern VP8SSIMGetFunc VP8SSIMGet; // unclipped / unchecked
|
||||
extern VP8SSIMGetClippedFunc VP8SSIMGetClipped; // with clipping
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_DISABLE_STATS)
|
||||
typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1,
|
||||
const uint8_t* src2, int len);
|
||||
extern VP8AccumulateSSEFunc VP8AccumulateSSE;
|
||||
#endif
|
||||
|
||||
// must be called before using any of the above directly
|
||||
void VP8SSIMDspInit(void);
|
||||
@ -480,12 +503,12 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
|
||||
extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
|
||||
|
||||
// Plain-C implementation, as fall-back.
|
||||
extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
|
||||
|
||||
// Main entry calls:
|
||||
extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
|
||||
@ -551,25 +574,22 @@ void WebPMultRows(uint8_t* ptr, int stride,
|
||||
int width, int num_rows, int inverse);
|
||||
|
||||
// Plain-C versions, used as fallback by some implementations.
|
||||
void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse);
|
||||
void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
|
||||
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
int width, int inverse);
|
||||
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
|
||||
|
||||
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
|
||||
extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
|
||||
// This function returns true if src[i] contains a value different from 0xff.
|
||||
extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
|
||||
// This function returns true if src[4*i] contains a value different from 0xff.
|
||||
extern int (*WebPHasAlpha32b)(const uint8_t* src, int length);
|
||||
|
||||
// To be called first before using the above.
|
||||
void WebPInitAlphaProcessing(void);
|
||||
|
||||
// ARGB packing function: a/r/g/b input is rgba or bgra order.
|
||||
extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
|
||||
const uint8_t* g, const uint8_t* b, int len,
|
||||
uint32_t* out);
|
||||
|
||||
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
|
||||
extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
|
||||
// To be called first before using the above.
|
||||
void VP8EncDspARGBInit(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Filter functions
|
||||
|
||||
|
158
src/dsp/enc.c
158
src/dsp/enc.c
@ -14,16 +14,18 @@
|
||||
#include <assert.h>
|
||||
#include <stdlib.h> // for abs()
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int clip_max(int v, int max) {
|
||||
return (v > max) ? max : v;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Compute susceptibility based on DCT-coeff histograms:
|
||||
@ -56,9 +58,10 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
histo->last_non_zero = last_non_zero;
|
||||
}
|
||||
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
@ -76,6 +79,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
}
|
||||
VP8SetHistogramData(distribution, histo);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// run-time tables (~4k)
|
||||
@ -100,6 +104,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#define STORE(x, y, v) \
|
||||
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
|
||||
|
||||
@ -140,15 +146,15 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
int i;
|
||||
int tmp[16];
|
||||
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
|
||||
@ -176,13 +182,16 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
VP8FTransform(src, ref, out);
|
||||
VP8FTransform(src + 4, ref + 4, out + 16);
|
||||
}
|
||||
|
||||
static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void FTransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
// input is 12b signed
|
||||
int32_t tmp[16];
|
||||
int i;
|
||||
@ -211,6 +220,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
out[12 + i] = b3 >> 1;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#undef MUL
|
||||
#undef STORE
|
||||
@ -303,8 +313,8 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
@ -323,8 +333,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_C(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DCMode(I16DC16 + dst, left, top, 16, 16, 5);
|
||||
VerticalPred(I16VE16 + dst, top, 16);
|
||||
HorizontalPred(I16HE16 + dst, left, 16);
|
||||
@ -507,7 +517,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -523,6 +533,7 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Metric
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
int w, int h) {
|
||||
int count = 0;
|
||||
@ -538,20 +549,21 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 16, 16);
|
||||
}
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 16, 8);
|
||||
}
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 8, 8);
|
||||
}
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 4, 4);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
int k, x, y;
|
||||
for (k = 0; k < 4; ++k) {
|
||||
uint32_t avg = 0;
|
||||
@ -571,6 +583,7 @@ static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// We try to match the spectral content (weighted) between source and
|
||||
// reconstructed samples.
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
@ -608,24 +621,25 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int sum1 = TTransform(a, w);
|
||||
const int sum2 = TTransform(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
D += Disto4x4_C(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Quantization
|
||||
@ -636,8 +650,8 @@ static const uint8_t kZigzag[16] = {
|
||||
};
|
||||
|
||||
// Simple quantization
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
int last = -1;
|
||||
int n;
|
||||
for (n = 0; n < 16; ++n) {
|
||||
@ -662,13 +676,15 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
return (last >= 0);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Block copy
|
||||
@ -682,11 +698,11 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||
}
|
||||
}
|
||||
|
||||
static void Copy4x4(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
|
||||
Copy(src, dst, 4, 4);
|
||||
}
|
||||
|
||||
static void Copy16x8(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
|
||||
Copy(src, dst, 16, 8);
|
||||
}
|
||||
|
||||
@ -734,26 +750,32 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
|
||||
InitTables();
|
||||
|
||||
// default C implementations
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
VP8FTransform2 = FTransform2;
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
VP8EncPredLuma4 = Intra4Preds;
|
||||
VP8EncPredLuma16 = Intra16Preds;
|
||||
VP8EncPredChroma8 = IntraChromaPreds;
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8Mean16x4 = Mean16x4;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock;
|
||||
VP8Copy4x4 = Copy4x4;
|
||||
VP8Copy16x8 = Copy16x8;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8ITransform = ITransform_C;
|
||||
VP8FTransform = FTransform_C;
|
||||
VP8FTransformWHT = FTransformWHT_C;
|
||||
VP8TDisto4x4 = Disto4x4_C;
|
||||
VP8TDisto16x16 = Disto16x16_C;
|
||||
VP8CollectHistogram = CollectHistogram_C;
|
||||
VP8SSE16x16 = SSE16x16_C;
|
||||
VP8SSE16x8 = SSE16x8_C;
|
||||
VP8SSE8x8 = SSE8x8_C;
|
||||
VP8SSE4x4 = SSE4x4_C;
|
||||
#endif
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
VP8EncQuantizeBlock = QuantizeBlock_C;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_C;
|
||||
#endif
|
||||
|
||||
VP8FTransform2 = FTransform2_C;
|
||||
VP8EncPredLuma4 = Intra4Preds_C;
|
||||
VP8EncPredLuma16 = Intra16Preds_C;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_C;
|
||||
VP8Mean16x4 = Mean16x4_C;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_C;
|
||||
VP8Copy4x4 = Copy4x4_C;
|
||||
VP8Copy16x8 = Copy16x8_C;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@ -772,11 +794,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
|
||||
VP8EncDspInitAVX2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8EncDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
if (VP8GetCPUInfo(kMIPS32)) {
|
||||
VP8EncDspInitMIPS32();
|
||||
@ -793,5 +810,34 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8EncDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(VP8ITransform != NULL);
|
||||
assert(VP8FTransform != NULL);
|
||||
assert(VP8FTransformWHT != NULL);
|
||||
assert(VP8TDisto4x4 != NULL);
|
||||
assert(VP8TDisto16x16 != NULL);
|
||||
assert(VP8CollectHistogram != NULL);
|
||||
assert(VP8SSE16x16 != NULL);
|
||||
assert(VP8SSE16x8 != NULL);
|
||||
assert(VP8SSE8x8 != NULL);
|
||||
assert(VP8SSE4x4 != NULL);
|
||||
assert(VP8EncQuantizeBlock != NULL);
|
||||
assert(VP8EncQuantize2Blocks != NULL);
|
||||
assert(VP8FTransform2 != NULL);
|
||||
assert(VP8EncPredLuma4 != NULL);
|
||||
assert(VP8EncPredLuma16 != NULL);
|
||||
assert(VP8EncPredChroma8 != NULL);
|
||||
assert(VP8Mean16x4 != NULL);
|
||||
assert(VP8EncQuantizeBlockWHT != NULL);
|
||||
assert(VP8Copy4x4 != NULL);
|
||||
assert(VP8Copy16x8 != NULL);
|
||||
|
||||
enc_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
//
|
||||
// AVX2 version of speed-critical encoding functions.
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_AVX2)
|
||||
|
||||
|
@ -13,13 +13,13 @@
|
||||
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||
// Slobodan Prijic (slobodan.prijic@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
|
||||
#include "./mips_macro.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "src/dsp/mips_macro.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
static const int kC2 = 35468;
|
||||
@ -113,8 +113,9 @@ static const int kC2 = 35468;
|
||||
"sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
|
||||
int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
|
||||
@ -144,11 +145,11 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
ITransformOne_MIPS32(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
ITransformOne_MIPS32(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -187,8 +188,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
|
||||
"sh %[temp5], " #J "(%[ppin]) \n\t" \
|
||||
"sh %[level], " #N "(%[pout]) \n\t"
|
||||
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int sign, coeff, level, i;
|
||||
int max_level = MAX_LEVEL;
|
||||
@ -238,11 +239,11 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -361,8 +362,8 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
"msub %[temp6], %[temp0] \n\t" \
|
||||
"msub %[temp7], %[temp1] \n\t"
|
||||
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int tmp[32];
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
|
||||
@ -396,13 +397,13 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
D += Disto4x4_MIPS32(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -478,7 +479,8 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
|
||||
int temp17, temp18, temp19, temp20;
|
||||
@ -539,7 +541,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
|
||||
GET_SSE_INNER(D, D + 1, D + 2, D + 3)
|
||||
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -573,7 +575,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -599,7 +601,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -621,7 +623,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -651,17 +653,20 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
extern void VP8EncDspInitMIPS32(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPS32(void) {
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8ITransform = ITransform_MIPS32;
|
||||
VP8FTransform = FTransform_MIPS32;
|
||||
|
||||
VP8EncQuantizeBlock = QuantizeBlock_MIPS32;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_MIPS32;
|
||||
|
||||
VP8TDisto4x4 = Disto4x4_MIPS32;
|
||||
VP8TDisto16x16 = Disto16x16_MIPS32;
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
VP8SSE16x16 = SSE16x16_MIPS32;
|
||||
VP8SSE8x8 = SSE8x8_MIPS32;
|
||||
VP8SSE16x8 = SSE16x8_MIPS32;
|
||||
VP8SSE4x4 = SSE4x4_MIPS32;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -12,13 +12,13 @@
|
||||
// Author(s): Darko Laus (darko.laus@imgtec.com)
|
||||
// Mirko Raus (mirko.raus@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
#include "./mips_macro.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "src/dsp/mips_macro.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
static const int kC1 = 20091 + (1 << 16);
|
||||
static const int kC2 = 35468;
|
||||
@ -141,7 +141,8 @@ static const int kC2 = 35468;
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
const int c2217 = 2217;
|
||||
const int c5352 = 5352;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
@ -238,16 +239,16 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform_MIPSdspR2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
|
||||
|
||||
@ -313,13 +314,14 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
return abs(temp3 - temp17) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
D += Disto4x4_MIPSdspR2(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -1011,8 +1013,8 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DCMode8(C8DC8 + dst, left, top);
|
||||
VerticalPred8(C8VE8 + dst, top);
|
||||
@ -1031,8 +1033,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DCMode16(I16DC16 + dst, left, top);
|
||||
VerticalPred16(I16VE16 + dst, top);
|
||||
HorizontalPred16(I16HE16 + dst, left);
|
||||
@ -1041,7 +1043,7 @@ static void Intra16Preds(uint8_t* dst,
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -1077,7 +1079,7 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
GET_SSE_INNER(C) \
|
||||
GET_SSE_INNER(D)
|
||||
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1107,7 +1109,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1129,7 +1131,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1147,7 +1149,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1270,8 +1272,8 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
"usw $0, " #J "(%[ppin]) \n\t" \
|
||||
"3: \n\t"
|
||||
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
|
||||
int sign, coeff, level;
|
||||
int max_level = MAX_LEVEL;
|
||||
@ -1311,11 +1313,11 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
return (ret != 0);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -1358,7 +1360,7 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
"usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
|
||||
"usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
|
||||
|
||||
static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* in, int16_t* out) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
|
||||
@ -1450,9 +1452,9 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
"addiu %[temp8], %[temp8], 1 \n\t" \
|
||||
"sw %[temp8], 0(%[temp3]) \n\t"
|
||||
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
const int max_coeff = (MAX_COEFF_THRESH << 16) + MAX_COEFF_THRESH;
|
||||
@ -1484,23 +1486,28 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
extern void VP8EncDspInitMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void) {
|
||||
VP8FTransform = FTransform;
|
||||
VP8ITransform = ITransform;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8EncPredLuma16 = Intra16Preds;
|
||||
VP8EncPredChroma8 = IntraChromaPreds;
|
||||
VP8EncPredLuma4 = Intra4Preds;
|
||||
VP8FTransform = FTransform_MIPSdspR2;
|
||||
VP8FTransformWHT = FTransformWHT_MIPSdspR2;
|
||||
VP8ITransform = ITransform_MIPSdspR2;
|
||||
|
||||
VP8TDisto4x4 = Disto4x4_MIPSdspR2;
|
||||
VP8TDisto16x16 = Disto16x16_MIPSdspR2;
|
||||
|
||||
VP8EncPredLuma16 = Intra16Preds_MIPSdspR2;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_MIPSdspR2;
|
||||
VP8EncPredLuma4 = Intra4Preds_MIPSdspR2;
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
VP8SSE16x16 = SSE16x16_MIPSdspR2;
|
||||
VP8SSE8x8 = SSE8x8_MIPSdspR2;
|
||||
VP8SSE16x8 = SSE16x8_MIPSdspR2;
|
||||
VP8SSE4x4 = SSE4x4_MIPSdspR2;
|
||||
#endif
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
|
||||
VP8EncQuantizeBlock = QuantizeBlock_MIPSdspR2;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_MIPSdspR2;
|
||||
|
||||
VP8CollectHistogram = CollectHistogram_MIPSdspR2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
@ -11,13 +11,13 @@
|
||||
//
|
||||
// Author: Prashant Patil (prashant.patil@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MSA)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./msa_macro.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "src/dsp/msa_macro.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms
|
||||
@ -69,15 +69,16 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform_MSA(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
uint64_t out0, out1, out2, out3;
|
||||
uint32_t in0, in1, in2, in3;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
@ -130,7 +131,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
SD4(out0, out1, out2, out3, out, 8);
|
||||
}
|
||||
|
||||
static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
v8i16 in0 = { 0 };
|
||||
v8i16 in1 = { 0 };
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3;
|
||||
@ -167,7 +168,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
ST_SH2(out0, out1, out, 8);
|
||||
}
|
||||
|
||||
static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
int sum;
|
||||
uint32_t in0_m, in1_m, in2_m, in3_m;
|
||||
v16i8 src0 = { 0 };
|
||||
@ -199,20 +200,20 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int sum1 = TTransform(a, w);
|
||||
const int sum2 = TTransform(b, w);
|
||||
static int Disto4x4_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int sum1 = TTransform_MSA(a, w);
|
||||
const int sum2 = TTransform_MSA(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
D += Disto4x4_MSA(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -221,9 +222,9 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
//------------------------------------------------------------------------------
|
||||
// Histogram
|
||||
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
@ -430,7 +431,7 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
#undef AVG3
|
||||
#undef AVG2
|
||||
|
||||
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -547,8 +548,8 @@ static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
|
||||
STORE16x16(out, dst);
|
||||
}
|
||||
|
||||
static void Intra16Preds(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_MSA(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DCMode16x16(I16DC16 + dst, left, top);
|
||||
VerticalPred16x16(I16VE16 + dst, top);
|
||||
HorizontalPred16x16(I16HE16 + dst, left);
|
||||
@ -669,8 +670,8 @@ static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
STORE8x8(out, dst);
|
||||
}
|
||||
|
||||
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DCMode8x8(C8DC8 + dst, left, top);
|
||||
VerticalPred8x8(C8VE8 + dst, top);
|
||||
@ -711,7 +712,7 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -738,7 +739,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -757,7 +758,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -777,7 +778,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum = 0;
|
||||
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
|
||||
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
|
||||
@ -799,8 +800,8 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Quantization
|
||||
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
int sum;
|
||||
v8i16 in0, in1, sh0, sh1, out0, out1;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
|
||||
@ -852,8 +853,8 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
return (sum > 0);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -866,26 +867,26 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
extern void VP8EncDspInitMSA(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMSA(void) {
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
VP8ITransform = ITransform_MSA;
|
||||
VP8FTransform = FTransform_MSA;
|
||||
VP8FTransformWHT = FTransformWHT_MSA;
|
||||
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
VP8TDisto4x4 = Disto4x4_MSA;
|
||||
VP8TDisto16x16 = Disto16x16_MSA;
|
||||
VP8CollectHistogram = CollectHistogram_MSA;
|
||||
|
||||
VP8EncPredLuma4 = Intra4Preds;
|
||||
VP8EncPredLuma16 = Intra16Preds;
|
||||
VP8EncPredChroma8 = IntraChromaPreds;
|
||||
VP8EncPredLuma4 = Intra4Preds_MSA;
|
||||
VP8EncPredLuma16 = Intra16Preds_MSA;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_MSA;
|
||||
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
VP8SSE16x16 = SSE16x16_MSA;
|
||||
VP8SSE16x8 = SSE16x8_MSA;
|
||||
VP8SSE8x8 = SSE8x8_MSA;
|
||||
VP8SSE4x4 = SSE4x4_MSA;
|
||||
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock;
|
||||
VP8EncQuantizeBlock = QuantizeBlock_MSA;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_MSA;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_MSA;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MSA
|
||||
|
@ -11,14 +11,14 @@
|
||||
//
|
||||
// adapted from libvpx (http://www.webmproject.org/code/)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./neon.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "src/dsp/neon.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
@ -37,15 +37,15 @@ static const int16_t kC2 = 17734; // half of kC2, actually. See comment above.
|
||||
#if defined(WEBP_USE_INTRINSICS)
|
||||
|
||||
// Treats 'v' as an uint8x8_t and zero extends to an int16x8_t.
|
||||
static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) {
|
||||
static WEBP_INLINE int16x8_t ConvertU8ToS16_NEON(uint32x2_t v) {
|
||||
return vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(v)));
|
||||
}
|
||||
|
||||
// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
|
||||
// to the corresponding rows of 'dst'.
|
||||
static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
|
||||
const int16x8_t dst01,
|
||||
const int16x8_t dst23) {
|
||||
static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
|
||||
const int16x8_t dst01,
|
||||
const int16x8_t dst23) {
|
||||
// Unsigned saturate to 8b.
|
||||
const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
|
||||
const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
|
||||
@ -57,8 +57,10 @@ static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
|
||||
vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
|
||||
const uint8_t* const ref, uint8_t* const dst) {
|
||||
static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
|
||||
const int16x8_t row23,
|
||||
const uint8_t* const ref,
|
||||
uint8_t* const dst) {
|
||||
uint32x2_t dst01 = vdup_n_u32(0);
|
||||
uint32x2_t dst23 = vdup_n_u32(0);
|
||||
|
||||
@ -70,19 +72,20 @@ static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
|
||||
|
||||
{
|
||||
// Convert to 16b.
|
||||
const int16x8_t dst01_s16 = ConvertU8ToS16(dst01);
|
||||
const int16x8_t dst23_s16 = ConvertU8ToS16(dst23);
|
||||
const int16x8_t dst01_s16 = ConvertU8ToS16_NEON(dst01);
|
||||
const int16x8_t dst23_s16 = ConvertU8ToS16_NEON(dst23);
|
||||
|
||||
// Descale with rounding.
|
||||
const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
|
||||
const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
|
||||
// Add the inverse transform.
|
||||
SaturateAndStore4x4(dst, out01, out23);
|
||||
SaturateAndStore4x4_NEON(dst, out01, out23);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
|
||||
int16x8x2_t* const out) {
|
||||
static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
|
||||
const int16x8_t in1,
|
||||
int16x8x2_t* const out) {
|
||||
// a0 a1 a2 a3 | b0 b1 b2 b3 => a0 b0 c0 d0 | a1 b1 c1 d1
|
||||
// c0 c1 c2 c3 | d0 d1 d2 d3 a2 b2 c2 d2 | a3 b3 c3 d3
|
||||
const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
|
||||
@ -90,7 +93,7 @@ static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
|
||||
*out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
|
||||
static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
// {rows} = in0 | in4
|
||||
// in8 | in12
|
||||
// B1 = in4 | in12
|
||||
@ -113,22 +116,22 @@ static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
|
||||
const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c
|
||||
const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
|
||||
const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
|
||||
Transpose8x2(E0, E1, rows);
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void ITransformOne(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass(&rows);
|
||||
TransformPass(&rows);
|
||||
Add4x4(rows.val[0], rows.val[1], ref, dst);
|
||||
TransformPass_NEON(&rows);
|
||||
TransformPass_NEON(&rows);
|
||||
Add4x4_NEON(rows.val[0], rows.val[1], ref, dst);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void ITransformOne(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
const int kBPS = BPS;
|
||||
const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
|
||||
|
||||
@ -243,16 +246,16 @@ static void ITransformOne(const uint8_t* ref,
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void ITransform(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
static void ITransform_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst, int do_two) {
|
||||
ITransformOne_NEON(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
ITransformOne_NEON(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
// Load all 4x4 pixels into a single uint8x16_t variable.
|
||||
static uint8x16_t Load4x4(const uint8_t* src) {
|
||||
static uint8x16_t Load4x4_NEON(const uint8_t* src) {
|
||||
uint32x4_t out = vdupq_n_u32(0);
|
||||
out = vld1q_lane_u32((const uint32_t*)(src + 0 * BPS), out, 0);
|
||||
out = vld1q_lane_u32((const uint32_t*)(src + 1 * BPS), out, 1);
|
||||
@ -265,10 +268,12 @@ static uint8x16_t Load4x4(const uint8_t* src) {
|
||||
|
||||
#if defined(WEBP_USE_INTRINSICS)
|
||||
|
||||
static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
|
||||
const int16x4_t C, const int16x4_t D,
|
||||
int16x8_t* const out01,
|
||||
int16x8_t* const out32) {
|
||||
static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A,
|
||||
const int16x4_t B,
|
||||
const int16x4_t C,
|
||||
const int16x4_t D,
|
||||
int16x8_t* const out01,
|
||||
int16x8_t* const out32) {
|
||||
const int16x4x2_t AB = vtrn_s16(A, B);
|
||||
const int16x4x2_t CD = vtrn_s16(C, D);
|
||||
const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]),
|
||||
@ -283,24 +288,24 @@ static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
|
||||
vreinterpret_s64_s32(tmp02.val[1])));
|
||||
}
|
||||
|
||||
static WEBP_INLINE int16x8_t DiffU8ToS16(const uint8x8_t a,
|
||||
const uint8x8_t b) {
|
||||
static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
|
||||
const uint8x8_t b) {
|
||||
return vreinterpretq_s16_u16(vsubl_u8(a, b));
|
||||
}
|
||||
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
int16x8_t d0d1, d3d2; // working 4x4 int16 variables
|
||||
{
|
||||
const uint8x16_t S0 = Load4x4(src);
|
||||
const uint8x16_t R0 = Load4x4(ref);
|
||||
const int16x8_t D0D1 = DiffU8ToS16(vget_low_u8(S0), vget_low_u8(R0));
|
||||
const int16x8_t D2D3 = DiffU8ToS16(vget_high_u8(S0), vget_high_u8(R0));
|
||||
const uint8x16_t S0 = Load4x4_NEON(src);
|
||||
const uint8x16_t R0 = Load4x4_NEON(ref);
|
||||
const int16x8_t D0D1 = DiffU8ToS16_NEON(vget_low_u8(S0), vget_low_u8(R0));
|
||||
const int16x8_t D2D3 = DiffU8ToS16_NEON(vget_high_u8(S0), vget_high_u8(R0));
|
||||
const int16x4_t D0 = vget_low_s16(D0D1);
|
||||
const int16x4_t D1 = vget_high_s16(D0D1);
|
||||
const int16x4_t D2 = vget_low_s16(D2D3);
|
||||
const int16x4_t D3 = vget_high_s16(D2D3);
|
||||
Transpose4x4_S16(D0, D1, D2, D3, &d0d1, &d3d2);
|
||||
Transpose4x4_S16_NEON(D0, D1, D2, D3, &d0d1, &d3d2);
|
||||
}
|
||||
{ // 1rst pass
|
||||
const int32x4_t kCst937 = vdupq_n_s32(937);
|
||||
@ -318,7 +323,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref,
|
||||
const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
|
||||
const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9);
|
||||
const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
|
||||
Transpose4x4_S16(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
|
||||
Transpose4x4_S16_NEON(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
|
||||
}
|
||||
{ // 2nd pass
|
||||
// the (1<<16) addition is for the replacement: a3!=0 <-> 1-(a3==0)
|
||||
@ -358,8 +363,8 @@ static const int32_t kCoeff32[] = {
|
||||
51000, 51000, 51000, 51000
|
||||
};
|
||||
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
const int kBPS = BPS;
|
||||
const uint8_t* src_ptr = src;
|
||||
const uint8_t* ref_ptr = ref;
|
||||
@ -478,7 +483,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref,
|
||||
src += stride; \
|
||||
} while (0)
|
||||
|
||||
static void FTransformWHT(const int16_t* src, int16_t* out) {
|
||||
static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
|
||||
const int stride = 16;
|
||||
const int16x4_t zero = vdup_n_s16(0);
|
||||
int32x4x4_t tmp0;
|
||||
@ -516,7 +521,7 @@ static void FTransformWHT(const int16_t* src, int16_t* out) {
|
||||
tmp0.val[3] = vsubq_s32(a0, a1);
|
||||
}
|
||||
{
|
||||
const int32x4x4_t tmp1 = Transpose4x4(tmp0);
|
||||
const int32x4x4_t tmp1 = Transpose4x4_NEON(tmp0);
|
||||
// a0 = tmp[0 + i] + tmp[ 8 + i]
|
||||
// a1 = tmp[4 + i] + tmp[12 + i]
|
||||
// a2 = tmp[4 + i] - tmp[12 + i]
|
||||
@ -560,7 +565,7 @@ static void FTransformWHT(const int16_t* src, int16_t* out) {
|
||||
// a 26ae, b 26ae
|
||||
// a 37bf, b 37bf
|
||||
//
|
||||
static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
|
||||
static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) {
|
||||
const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]);
|
||||
const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]);
|
||||
const int32x4x2_t q2_tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[0]),
|
||||
@ -574,7 +579,8 @@ static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
|
||||
return q4_in;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const int16x8x4_t q4_in) {
|
||||
static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
|
||||
const int16x8x4_t q4_in) {
|
||||
// {a0, a1} = {in[0] + in[2], in[1] + in[3]}
|
||||
// {a3, a2} = {in[0] - in[2], in[1] - in[3]}
|
||||
const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
|
||||
@ -593,7 +599,7 @@ static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const int16x8x4_t q4_in) {
|
||||
return q4_out;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int16x8x4_t DistoVerticalPass(const uint8x8x4_t q4_in) {
|
||||
static WEBP_INLINE int16x8x4_t DistoVerticalPass_NEON(const uint8x8x4_t q4_in) {
|
||||
const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0],
|
||||
q4_in.val[2]));
|
||||
const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1],
|
||||
@ -610,7 +616,7 @@ static WEBP_INLINE int16x8x4_t DistoVerticalPass(const uint8x8x4_t q4_in) {
|
||||
return q4_out;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
|
||||
static WEBP_INLINE int16x4x4_t DistoLoadW_NEON(const uint16_t* w) {
|
||||
const uint16x8_t q_w07 = vld1q_u16(&w[0]);
|
||||
const uint16x8_t q_w8f = vld1q_u16(&w[8]);
|
||||
int16x4x4_t d4_w;
|
||||
@ -622,8 +628,8 @@ static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
|
||||
return d4_w;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
|
||||
const int16x4x4_t d4_w) {
|
||||
static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
|
||||
const int16x4x4_t d4_w) {
|
||||
int32x2_t d_sum;
|
||||
// sum += w[ 0] * abs(b0);
|
||||
// sum += w[ 4] * abs(b1);
|
||||
@ -652,8 +658,8 @@ static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
|
||||
@ -679,12 +685,12 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
// Vertical pass first to avoid a transpose (vertical and horizontal passes
|
||||
// are commutative because w/kWeightY is symmetric) and subsequent
|
||||
// transpose.
|
||||
const int16x8x4_t q4_v = DistoVerticalPass(d4_in);
|
||||
const int16x4x4_t d4_w = DistoLoadW(w);
|
||||
const int16x8x4_t q4_v = DistoVerticalPass_NEON(d4_in);
|
||||
const int16x4x4_t d4_w = DistoLoadW_NEON(w);
|
||||
// horizontal pass
|
||||
const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_v);
|
||||
const int16x8x4_t q4_h = DistoHorizontalPass(q4_t);
|
||||
int32x2_t d_sum = DistoSum(q4_h, d4_w);
|
||||
const int16x8x4_t q4_t = DistoTranspose4x4S16_NEON(q4_v);
|
||||
const int16x8x4_t q4_h = DistoHorizontalPass_NEON(q4_t);
|
||||
int32x2_t d_sum = DistoSum_NEON(q4_h, d4_w);
|
||||
|
||||
// abs(sum2 - sum1) >> 5
|
||||
d_sum = vabs_s32(d_sum);
|
||||
@ -694,13 +700,13 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
}
|
||||
#undef LOAD_LANE_32b
|
||||
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
D += Disto4x4_NEON(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -708,15 +714,15 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
int16_t out[16];
|
||||
FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
|
||||
FTransform_NEON(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
|
||||
{
|
||||
int k;
|
||||
const int16x8_t a0 = vld1q_s16(out + 0);
|
||||
@ -740,9 +746,9 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
uint32x4_t* const sum) {
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
uint32x4_t* const sum) {
|
||||
const uint8x16_t a0 = vld1q_u8(a);
|
||||
const uint8x16_t b0 = vld1q_u8(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
@ -757,7 +763,7 @@ static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,
|
||||
}
|
||||
|
||||
// Horizontal sum of all four uint32_t values in 'sum'.
|
||||
static int SumToInt(uint32x4_t sum) {
|
||||
static int SumToInt_NEON(uint32x4_t sum) {
|
||||
const uint64x2_t sum2 = vpaddlq_u32(sum);
|
||||
const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
|
||||
return (int)sum3;
|
||||
@ -767,18 +773,18 @@ static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 16; ++y) {
|
||||
AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
|
||||
AccumulateSSE16_NEON(a + y * BPS, b + y * BPS, &sum);
|
||||
}
|
||||
return SumToInt(sum);
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
|
||||
AccumulateSSE16_NEON(a + y * BPS, b + y * BPS, &sum);
|
||||
}
|
||||
return SumToInt(sum);
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
@ -791,12 +797,12 @@ static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
const uint16x8_t prod = vmull_u8(abs_diff, abs_diff);
|
||||
sum = vpadalq_u16(sum, prod);
|
||||
}
|
||||
return SumToInt(sum);
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
const uint8x16_t a0 = Load4x4(a);
|
||||
const uint8x16_t b0 = Load4x4(b);
|
||||
const uint8x16_t a0 = Load4x4_NEON(a);
|
||||
const uint8x16_t b0 = Load4x4_NEON(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
|
||||
vget_low_u8(abs_diff));
|
||||
@ -805,7 +811,7 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
/* pair-wise adds and widen */
|
||||
const uint32x4_t sum1 = vpaddlq_u16(prod1);
|
||||
const uint32x4_t sum2 = vpaddlq_u16(prod2);
|
||||
return SumToInt(vaddq_u32(sum1, sum2));
|
||||
return SumToInt_NEON(vaddq_u32(sum1, sum2));
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -813,8 +819,8 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
// Compilation with gcc-4.6.x is problematic for now.
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
|
||||
static int16x8_t Quantize(int16_t* const in,
|
||||
const VP8Matrix* const mtx, int offset) {
|
||||
static int16x8_t Quantize_NEON(int16_t* const in,
|
||||
const VP8Matrix* const mtx, int offset) {
|
||||
const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
|
||||
const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
|
||||
const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
|
||||
@ -847,10 +853,10 @@ static const uint8_t kShuffles[4][8] = {
|
||||
{ 14, 15, 22, 23, 28, 29, 30, 31 }
|
||||
};
|
||||
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const int16x8_t out0 = Quantize(in, mtx, 0);
|
||||
const int16x8_t out1 = Quantize(in, mtx, 8);
|
||||
static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const int16x8_t out0 = Quantize_NEON(in, mtx, 0);
|
||||
const int16x8_t out1 = Quantize_NEON(in, mtx, 8);
|
||||
uint8x8x4_t shuffles;
|
||||
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
|
||||
// non-standard versions there.
|
||||
@ -889,11 +895,11 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -905,14 +911,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
extern void VP8EncDspInitNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
VP8ITransform = ITransform_NEON;
|
||||
VP8FTransform = FTransform_NEON;
|
||||
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
VP8FTransformWHT = FTransformWHT_NEON;
|
||||
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
VP8TDisto4x4 = Disto4x4_NEON;
|
||||
VP8TDisto16x16 = Disto16x16_NEON;
|
||||
VP8CollectHistogram = CollectHistogram_NEON;
|
||||
|
||||
VP8SSE16x16 = SSE16x16_NEON;
|
||||
VP8SSE16x8 = SSE16x8_NEON;
|
||||
@ -920,8 +926,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
|
||||
VP8SSE4x4 = SSE4x4_NEON;
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8EncQuantizeBlock = QuantizeBlock_NEON;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_NEON;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -11,23 +11,23 @@
|
||||
//
|
||||
// Author: Christian Duvivier (cduvivier@google.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <assert.h>
|
||||
#include <stdlib.h> // for abs()
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "./common_sse2.h"
|
||||
#include "../enc/cost_enc.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/enc/cost_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -193,10 +193,10 @@ static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransformPass1(const __m128i* const in01,
|
||||
const __m128i* const in23,
|
||||
__m128i* const out01,
|
||||
__m128i* const out32) {
|
||||
static void FTransformPass1_SSE2(const __m128i* const in01,
|
||||
const __m128i* const in23,
|
||||
__m128i* const out01,
|
||||
__m128i* const out32) {
|
||||
const __m128i k937 = _mm_set1_epi32(937);
|
||||
const __m128i k1812 = _mm_set1_epi32(1812);
|
||||
|
||||
@ -239,8 +239,9 @@ static void FTransformPass1(const __m128i* const in01,
|
||||
*out32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2)); // 3 2 3 2 3 2..
|
||||
}
|
||||
|
||||
static void FTransformPass2(const __m128i* const v01, const __m128i* const v32,
|
||||
int16_t* out) {
|
||||
static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
const __m128i* const v32,
|
||||
int16_t* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i seven = _mm_set1_epi16(7);
|
||||
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
|
||||
@ -291,7 +292,8 @@ static void FTransformPass2(const __m128i* const v01, const __m128i* const v32,
|
||||
_mm_storeu_si128((__m128i*)&out[8], d2_f3);
|
||||
}
|
||||
|
||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
// Load src.
|
||||
const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
|
||||
@ -328,13 +330,14 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
__m128i v01, v32;
|
||||
|
||||
// First pass
|
||||
FTransformPass1(&row01, &row23, &v01, &v32);
|
||||
FTransformPass1_SSE2(&row01, &row23, &v01, &v32);
|
||||
|
||||
// Second pass
|
||||
FTransformPass2(&v01, &v32, out);
|
||||
FTransformPass2_SSE2(&v01, &v32, out);
|
||||
}
|
||||
|
||||
static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load src and convert to 16b.
|
||||
@ -374,15 +377,15 @@ static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
__m128i v01h, v32h;
|
||||
|
||||
// First pass
|
||||
FTransformPass1(&shuf01l, &shuf23l, &v01l, &v32l);
|
||||
FTransformPass1(&shuf01h, &shuf23h, &v01h, &v32h);
|
||||
FTransformPass1_SSE2(&shuf01l, &shuf23l, &v01l, &v32l);
|
||||
FTransformPass1_SSE2(&shuf01h, &shuf23h, &v01h, &v32h);
|
||||
|
||||
// Second pass
|
||||
FTransformPass2(&v01l, &v32l, out + 0);
|
||||
FTransformPass2(&v01h, &v32h, out + 16);
|
||||
FTransformPass2_SSE2(&v01l, &v32l, out + 0);
|
||||
FTransformPass2_SSE2(&v01h, &v32h, out + 16);
|
||||
}
|
||||
|
||||
static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
|
||||
static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
const __m128i kMult = _mm_set_epi16(-1, 1, -1, 1, 1, 1, 1, 1);
|
||||
const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
|
||||
const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
|
||||
@ -398,14 +401,14 @@ static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
|
||||
*out = _mm_madd_epi16(D, kMult);
|
||||
}
|
||||
|
||||
static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
// Input is 12b signed.
|
||||
__m128i row0, row1, row2, row3;
|
||||
// Rows are 14b signed.
|
||||
FTransformWHTRow(in + 0 * 64, &row0);
|
||||
FTransformWHTRow(in + 1 * 64, &row1);
|
||||
FTransformWHTRow(in + 2 * 64, &row2);
|
||||
FTransformWHTRow(in + 3 * 64, &row3);
|
||||
FTransformWHTRow_SSE2(in + 0 * 64, &row0);
|
||||
FTransformWHTRow_SSE2(in + 1 * 64, &row1);
|
||||
FTransformWHTRow_SSE2(in + 2 * 64, &row2);
|
||||
FTransformWHTRow_SSE2(in + 3 * 64, &row3);
|
||||
|
||||
{
|
||||
// The a* are 15b signed.
|
||||
@ -431,9 +434,9 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
// Compute susceptibility based on DCT-coeff histograms:
|
||||
// the higher, the "easier" the macroblock is to compress.
|
||||
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
@ -442,7 +445,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int16_t out[16];
|
||||
int k;
|
||||
|
||||
FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
|
||||
FTransform_SSE2(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
|
||||
|
||||
// Convert coefficients to bin (within out[]).
|
||||
{
|
||||
@ -476,7 +479,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
// Intra predictions
|
||||
|
||||
// helper for chroma-DC predictions
|
||||
static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -484,7 +487,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -492,20 +495,20 @@ static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
|
||||
static WEBP_INLINE void Fill_SSE2(uint8_t* dst, int value, int size) {
|
||||
if (size == 4) {
|
||||
int j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
memset(dst + j * BPS, value, 4);
|
||||
}
|
||||
} else if (size == 8) {
|
||||
Put8x8uv(value, dst);
|
||||
Put8x8uv_SSE2(value, dst);
|
||||
} else {
|
||||
Put16(value, dst);
|
||||
Put16_SSE2(value, dst);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE8uv(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
int j;
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -513,7 +516,7 @@ static WEBP_INLINE void VE8uv(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE16(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i top_values = _mm_load_si128((const __m128i*)top);
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -521,20 +524,20 @@ static WEBP_INLINE void VE16(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
if (top != NULL) {
|
||||
if (size == 8) {
|
||||
VE8uv(dst, top);
|
||||
VE8uv_SSE2(dst, top);
|
||||
} else {
|
||||
VE16(dst, top);
|
||||
VE16_SSE2(dst, top);
|
||||
}
|
||||
} else {
|
||||
Fill(dst, 127, size);
|
||||
Fill_SSE2(dst, 127, size);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE8uv(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(left[j]);
|
||||
@ -543,7 +546,7 @@ static WEBP_INLINE void HE8uv(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE16(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(left[j]);
|
||||
@ -552,21 +555,21 @@ static WEBP_INLINE void HE16(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
if (left != NULL) {
|
||||
if (size == 8) {
|
||||
HE8uv(dst, left);
|
||||
HE8uv_SSE2(dst, left);
|
||||
} else {
|
||||
HE16(dst, left);
|
||||
HE16_SSE2(dst, left);
|
||||
}
|
||||
} else {
|
||||
Fill(dst, 129, size);
|
||||
Fill_SSE2(dst, 129, size);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
if (size == 8) {
|
||||
@ -593,13 +596,13 @@ static WEBP_INLINE void TM(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
TM(dst, left, top, size);
|
||||
TM_SSE2(dst, left, top, size);
|
||||
} else {
|
||||
HorizontalPred(dst, left, size);
|
||||
HorizontalPred_SSE2(dst, left, size);
|
||||
}
|
||||
} else {
|
||||
// true motion without left samples (hence: with default 129 value)
|
||||
@ -607,90 +610,90 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
// Note that if top samples are not available, the default value is
|
||||
// then 129, and not 127 as in the VerticalPred case.
|
||||
if (top != NULL) {
|
||||
VerticalPred(dst, top, size);
|
||||
VerticalPred_SSE2(dst, top, size);
|
||||
} else {
|
||||
Fill(dst, 129, size);
|
||||
Fill_SSE2(dst, 129, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uv(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
|
||||
const __m128i combined = _mm_unpacklo_epi64(top_values, left_values);
|
||||
const int DC = VP8HorizontalAdd8b(&combined) + 8;
|
||||
Put8x8uv(DC >> 4, dst);
|
||||
Put8x8uv_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoLeft(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i sum = _mm_sad_epu8(top_values, zero);
|
||||
const int DC = _mm_cvtsi128_si32(sum) + 4;
|
||||
Put8x8uv(DC >> 3, dst);
|
||||
Put8x8uv_SSE2(DC >> 3, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoTop(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC8uvNoLeft(dst, left);
|
||||
DC8uvNoLeft_SSE2(dst, left);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoTopLeft(uint8_t* dst) {
|
||||
Put8x8uv(0x80, dst);
|
||||
static WEBP_INLINE void DC8uvNoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put8x8uv_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvMode(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC8uv(dst, left, top);
|
||||
DC8uv_SSE2(dst, left, top);
|
||||
} else { // top, but no left
|
||||
DC8uvNoLeft(dst, top);
|
||||
DC8uvNoLeft_SSE2(dst, top);
|
||||
}
|
||||
} else if (left != NULL) { // left but no top
|
||||
DC8uvNoTop(dst, left);
|
||||
DC8uvNoTop_SSE2(dst, left);
|
||||
} else { // no top, no left, nothing.
|
||||
DC8uvNoTopLeft(dst);
|
||||
DC8uvNoTopLeft_SSE2(dst);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const __m128i left_row = _mm_load_si128((const __m128i*)left);
|
||||
const int DC =
|
||||
VP8HorizontalAdd8b(&top_row) + VP8HorizontalAdd8b(&left_row) + 16;
|
||||
Put16(DC >> 5, dst);
|
||||
Put16_SSE2(DC >> 5, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoLeft(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const int DC = VP8HorizontalAdd8b(&top_row) + 8;
|
||||
Put16(DC >> 4, dst);
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoTop(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC16NoLeft(dst, left);
|
||||
DC16NoLeft_SSE2(dst, left);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoTopLeft(uint8_t* dst) {
|
||||
Put16(0x80, dst);
|
||||
static WEBP_INLINE void DC16NoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put16_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16Mode(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC16(dst, left, top);
|
||||
DC16_SSE2(dst, left, top);
|
||||
} else { // top, but no left
|
||||
DC16NoLeft(dst, top);
|
||||
DC16NoLeft_SSE2(dst, top);
|
||||
}
|
||||
} else if (left != NULL) { // left but no top
|
||||
DC16NoTop(dst, left);
|
||||
DC16NoTop_SSE2(dst, left);
|
||||
} else { // no top, no left, nothing.
|
||||
DC16NoTopLeft(dst);
|
||||
DC16NoTopLeft_SSE2(dst);
|
||||
}
|
||||
}
|
||||
|
||||
@ -709,7 +712,8 @@ static WEBP_INLINE void DC16Mode(uint8_t* dst, const uint8_t* left,
|
||||
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
|
||||
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
|
||||
|
||||
static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // vertical
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -725,7 +729,8 @@ static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // horizontal
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -737,14 +742,15 @@ static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill(dst, dc >> 3, 4);
|
||||
Fill_SSE2(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) { // Down-Left
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -760,8 +766,8 @@ static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) { // Down-Left
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Right
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -786,8 +792,8 @@ static WEBP_INLINE void VR4(uint8_t* dst,
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VL4(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Left
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -812,7 +818,8 @@ static WEBP_INLINE void VL4(uint8_t* dst,
|
||||
DST(3, 3) = (extra_out >> 8) & 0xff;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) { // Down-right
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
|
||||
const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
|
||||
@ -828,7 +835,7 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) { // Down-right
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -843,7 +850,7 @@ static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -866,7 +873,7 @@ static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
|
||||
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
|
||||
@ -888,55 +895,56 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
HE4(I4HE4 + dst, top);
|
||||
RD4(I4RD4 + dst, top);
|
||||
VR4(I4VR4 + dst, top);
|
||||
LD4(I4LD4 + dst, top);
|
||||
VL4(I4VL4 + dst, top);
|
||||
HD4(I4HD4 + dst, top);
|
||||
HU4(I4HU4 + dst, top);
|
||||
static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DC4_SSE2(I4DC4 + dst, top);
|
||||
TM4_SSE2(I4TM4 + dst, top);
|
||||
VE4_SSE2(I4VE4 + dst, top);
|
||||
HE4_SSE2(I4HE4 + dst, top);
|
||||
RD4_SSE2(I4RD4 + dst, top);
|
||||
VR4_SSE2(I4VR4 + dst, top);
|
||||
LD4_SSE2(I4LD4 + dst, top);
|
||||
VL4_SSE2(I4VL4 + dst, top);
|
||||
HD4_SSE2(I4HD4 + dst, top);
|
||||
HU4_SSE2(I4HU4 + dst, top);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DC8uvMode(C8DC8 + dst, left, top);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
HorizontalPred(C8HE8 + dst, left, 8);
|
||||
TrueMotion(C8TM8 + dst, left, top, 8);
|
||||
DC8uvMode_SSE2(C8DC8 + dst, left, top);
|
||||
VerticalPred_SSE2(C8VE8 + dst, top, 8);
|
||||
HorizontalPred_SSE2(C8HE8 + dst, left, 8);
|
||||
TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
|
||||
// V block
|
||||
dst += 8;
|
||||
if (top != NULL) top += 8;
|
||||
if (left != NULL) left += 16;
|
||||
DC8uvMode(C8DC8 + dst, left, top);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
HorizontalPred(C8HE8 + dst, left, 8);
|
||||
TrueMotion(C8TM8 + dst, left, top, 8);
|
||||
DC8uvMode_SSE2(C8DC8 + dst, left, top);
|
||||
VerticalPred_SSE2(C8VE8 + dst, top, 8);
|
||||
HorizontalPred_SSE2(C8HE8 + dst, left, 8);
|
||||
TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DC16Mode(I16DC16 + dst, left, top);
|
||||
VerticalPred(I16VE16 + dst, top, 16);
|
||||
HorizontalPred(I16HE16 + dst, left, 16);
|
||||
TrueMotion(I16TM16 + dst, left, top, 16);
|
||||
static void Intra16Preds_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DC16Mode_SSE2(I16DC16 + dst, left, top);
|
||||
VerticalPred_SSE2(I16VE16 + dst, top, 16);
|
||||
HorizontalPred_SSE2(I16HE16 + dst, left, 16);
|
||||
TrueMotion_SSE2(I16TM16 + dst, left, top, 16);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Metric
|
||||
|
||||
static WEBP_INLINE void SubtractAndAccumulate(const __m128i a, const __m128i b,
|
||||
__m128i* const sum) {
|
||||
static WEBP_INLINE void SubtractAndAccumulate_SSE2(const __m128i a,
|
||||
const __m128i b,
|
||||
__m128i* const sum) {
|
||||
// take abs(a-b) in 8b
|
||||
const __m128i a_b = _mm_subs_epu8(a, b);
|
||||
const __m128i b_a = _mm_subs_epu8(b, a);
|
||||
@ -951,8 +959,8 @@ static WEBP_INLINE void SubtractAndAccumulate(const __m128i a, const __m128i b,
|
||||
*sum = _mm_add_epi32(sum1, sum2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
|
||||
int num_pairs) {
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
int num_pairs) {
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
int32_t tmp[4];
|
||||
int i;
|
||||
@ -963,8 +971,8 @@ static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[BPS * 1]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[BPS * 1]);
|
||||
__m128i sum1, sum2;
|
||||
SubtractAndAccumulate(a0, b0, &sum1);
|
||||
SubtractAndAccumulate(a1, b1, &sum2);
|
||||
SubtractAndAccumulate_SSE2(a0, b0, &sum1);
|
||||
SubtractAndAccumulate_SSE2(a1, b1, &sum2);
|
||||
sum = _mm_add_epi32(sum, _mm_add_epi32(sum1, sum2));
|
||||
a += 2 * BPS;
|
||||
b += 2 * BPS;
|
||||
@ -973,18 +981,18 @@ static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
|
||||
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
|
||||
}
|
||||
|
||||
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN(a, b, 8);
|
||||
static int SSE16x16_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN_SSE2(a, b, 8);
|
||||
}
|
||||
|
||||
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN(a, b, 4);
|
||||
static int SSE16x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN_SSE2(a, b, 4);
|
||||
}
|
||||
|
||||
#define LOAD_8x16b(ptr) \
|
||||
_mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
|
||||
|
||||
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int num_pairs = 4;
|
||||
__m128i sum = zero;
|
||||
@ -1011,7 +1019,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
}
|
||||
#undef LOAD_8x16b
|
||||
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load values. Note that we read 8 pixels instead of 4,
|
||||
@ -1048,7 +1056,7 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
const __m128i mask = _mm_set1_epi16(0x00ff);
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&ref[BPS * 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
|
||||
@ -1086,8 +1094,8 @@ static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
int32_t sum[4];
|
||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -1187,19 +1195,19 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform(a, b, w);
|
||||
static int Disto4x4_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform_SSE2(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
D += Disto4x4_SSE2(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -1209,9 +1217,9 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
// Quantization
|
||||
//
|
||||
|
||||
static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i coeff0, coeff8;
|
||||
@ -1321,22 +1329,22 @@ static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
|
||||
}
|
||||
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
|
||||
static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock(in, out, NULL, mtx);
|
||||
static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
|
||||
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
nz |= DoQuantizeBlock_SSE2(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -1346,24 +1354,24 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
extern void VP8EncDspInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) {
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
VP8EncPredLuma16 = Intra16Preds;
|
||||
VP8EncPredChroma8 = IntraChromaPreds;
|
||||
VP8EncPredLuma4 = Intra4Preds;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
|
||||
VP8ITransform = ITransform;
|
||||
VP8FTransform = FTransform;
|
||||
VP8FTransform2 = FTransform2;
|
||||
VP8FTransformWHT = FTransformWHT;
|
||||
VP8SSE16x16 = SSE16x16;
|
||||
VP8SSE16x8 = SSE16x8;
|
||||
VP8SSE8x8 = SSE8x8;
|
||||
VP8SSE4x4 = SSE4x4;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8Mean16x4 = Mean16x4;
|
||||
VP8CollectHistogram = CollectHistogram_SSE2;
|
||||
VP8EncPredLuma16 = Intra16Preds_SSE2;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_SSE2;
|
||||
VP8EncPredLuma4 = Intra4Preds_SSE2;
|
||||
VP8EncQuantizeBlock = QuantizeBlock_SSE2;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_SSE2;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHT_SSE2;
|
||||
VP8ITransform = ITransform_SSE2;
|
||||
VP8FTransform = FTransform_SSE2;
|
||||
VP8FTransform2 = FTransform2_SSE2;
|
||||
VP8FTransformWHT = FTransformWHT_SSE2;
|
||||
VP8SSE16x16 = SSE16x16_SSE2;
|
||||
VP8SSE16x8 = SSE16x8_SSE2;
|
||||
VP8SSE8x8 = SSE8x8_SSE2;
|
||||
VP8SSE4x4 = SSE4x4_SSE2;
|
||||
VP8TDisto4x4 = Disto4x4_SSE2;
|
||||
VP8TDisto16x16 = Disto16x16_SSE2;
|
||||
VP8Mean16x4 = Mean16x4_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -11,21 +11,21 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#include <smmintrin.h>
|
||||
#include <stdlib.h> // for abs()
|
||||
|
||||
#include "./common_sse2.h"
|
||||
#include "../enc/vp8i_enc.h"
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Compute susceptibility based on DCT-coeff histograms.
|
||||
|
||||
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -70,8 +70,8 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
static int TTransform_SSE41(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
int32_t sum[4];
|
||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||
|
||||
@ -168,19 +168,19 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform(a, b, w);
|
||||
static int Disto4x4_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform_SSE41(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
for (x = 0; x < 16; x += 4) {
|
||||
D += Disto4x4(a + x + y, b + x + y, w);
|
||||
D += Disto4x4_SSE41(a + x + y, b + x + y, w);
|
||||
}
|
||||
}
|
||||
return D;
|
||||
@ -197,9 +197,9 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||
2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, \
|
||||
2 * (B) + 1, 2 * (B) + 0, 2 * (A) + 1, 2 * (A) + 0)
|
||||
|
||||
static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i out0, out8;
|
||||
@ -300,22 +300,22 @@ static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
|
||||
#undef PSHUFB_CST
|
||||
|
||||
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
|
||||
static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock(in, out, NULL, mtx);
|
||||
static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
|
||||
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
nz |= DoQuantizeBlock_SSE41(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
|
||||
return nz;
|
||||
}
|
||||
|
||||
@ -324,12 +324,12 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
|
||||
extern void VP8EncDspInitSSE41(void);
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE41(void) {
|
||||
VP8CollectHistogram = CollectHistogram;
|
||||
VP8EncQuantizeBlock = QuantizeBlock;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
|
||||
VP8TDisto4x4 = Disto4x4;
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
VP8CollectHistogram = CollectHistogram_SSE41;
|
||||
VP8EncQuantizeBlock = QuantizeBlock_SSE41;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_SSE41;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHT_SSE41;
|
||||
VP8TDisto4x4 = Disto4x4_SSE41;
|
||||
VP8TDisto16x16 = Disto16x16_SSE41;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Urvang (urvang@google.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -20,16 +20,17 @@
|
||||
// Helpful macro.
|
||||
|
||||
# define SANITY_CHECK(in, out) \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; // Silence unused warning.
|
||||
|
||||
static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length, int inverse) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length, int inverse) {
|
||||
int i;
|
||||
if (inverse) {
|
||||
for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
|
||||
@ -41,7 +42,44 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
@ -53,48 +91,11 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
PredictLine(in, preds - stride, out, 1, inverse);
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -105,26 +106,28 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
PredictLine(in, preds, out, width, inverse);
|
||||
PredictLine_C(in, preds, out, width, inverse);
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Gradient filter.
|
||||
|
||||
static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
|
||||
static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
const int g = a + b - c;
|
||||
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -136,7 +139,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
@ -147,11 +150,11 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
while (row < last_row) {
|
||||
int w;
|
||||
// leftmost pixel: predict from above.
|
||||
PredictLine(in, preds - stride, out, 1, inverse);
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
for (w = 1; w < width; ++w) {
|
||||
const int pred = GradientPredictor(preds[w - 1],
|
||||
preds[w - stride],
|
||||
preds[w - stride - 1]);
|
||||
const int pred = GradientPredictor_C(preds[w - 1],
|
||||
preds[w - stride],
|
||||
preds[w - stride - 1]);
|
||||
out[w] = in[w] + (inverse ? pred : -pred);
|
||||
}
|
||||
++row;
|
||||
@ -160,32 +163,34 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#undef SANITY_CHECK
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void HorizontalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
|
||||
filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
DoVerticalFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
static void GradientFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
}
|
||||
|
||||
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
}
|
||||
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
uint8_t pred = (prev == NULL) ? 0 : prev[0];
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
@ -194,26 +199,28 @@ static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_C(NULL, in, out, width);
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) out[i] = prev[i] + in[i];
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_C(NULL, in, out, width);
|
||||
} else {
|
||||
uint8_t top = prev[0], top_left = top, left = top;
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
top = prev[i]; // need to read this first, in case prev==out
|
||||
left = in[i] + GradientPredictor(left, top, top_left);
|
||||
left = in[i] + GradientPredictor_C(left, top, top_left);
|
||||
top_left = top;
|
||||
out[i] = left;
|
||||
}
|
||||
@ -238,14 +245,18 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
|
||||
if (filters_last_cpuinfo_used == VP8GetCPUInfo) return;
|
||||
|
||||
WebPUnfilters[WEBP_FILTER_NONE] = NULL;
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
|
||||
#endif
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_C;
|
||||
|
||||
WebPFilters[WEBP_FILTER_NONE] = NULL;
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_C;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_C;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_C;
|
||||
#endif
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
@ -253,11 +264,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
|
||||
VP8FiltersInitSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8FiltersInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
VP8FiltersInitMIPSdspR2();
|
||||
@ -269,5 +275,20 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8FiltersInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
|
||||
assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
|
||||
assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
|
||||
assert(WebPFilters[WEBP_FILTER_HORIZONTAL] != NULL);
|
||||
assert(WebPFilters[WEBP_FILTER_VERTICAL] != NULL);
|
||||
assert(WebPFilters[WEBP_FILTER_GRADIENT] != NULL);
|
||||
|
||||
filters_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
|
@ -12,11 +12,11 @@
|
||||
// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
|
||||
// Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
|
||||
#include "../dsp/dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -101,8 +101,8 @@
|
||||
); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
|
||||
int length) {
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
int length) {
|
||||
DO_PREDICT_LINE(src, dst, length, 0);
|
||||
}
|
||||
|
||||
@ -192,10 +192,11 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -207,7 +208,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine(in + 1, out + 1, width - 1);
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
@ -219,9 +220,11 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
|
||||
int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -237,9 +240,11 @@ static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -252,7 +257,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine(in + 1, out + 1, width - 1);
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -266,15 +271,16 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Gradient filter.
|
||||
|
||||
static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
|
||||
static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
int temp0;
|
||||
__asm__ volatile (
|
||||
"addu %[temp0], %[a], %[b] \n\t"
|
||||
@ -293,9 +299,9 @@ static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
|
||||
int w; \
|
||||
PREDICT_LINE_ONE_PASS(in, PREDS - stride, out); \
|
||||
for (w = 1; w < width; ++w) { \
|
||||
const int pred = GradientPredictor(PREDS[w - 1], \
|
||||
PREDS[w - stride], \
|
||||
PREDS[w - stride - 1]); \
|
||||
const int pred = GradientPredictor_MIPSdspR2(PREDS[w - 1], \
|
||||
PREDS[w - stride], \
|
||||
PREDS[w - stride - 1]); \
|
||||
out[w] = in[w] OPERATION pred; \
|
||||
} \
|
||||
++row; \
|
||||
@ -304,9 +310,9 @@ static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
@ -318,7 +324,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLine(in + 1, out + 1, width - 1);
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
@ -330,38 +336,39 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void HorizontalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
|
||||
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
|
||||
}
|
||||
|
||||
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void VerticalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
|
||||
} else {
|
||||
DO_PREDICT_LINE_VERTICAL(in, prev, out, width, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void GradientUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
|
||||
} else {
|
||||
uint8_t top = prev[0], top_left = top, left = top;
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
top = prev[i]; // need to read this first, in case prev==dst
|
||||
left = in[i] + GradientPredictor(left, top, top_left);
|
||||
left = in[i] + GradientPredictor_MIPSdspR2(left, top, top_left);
|
||||
top_left = top;
|
||||
out[i] = left;
|
||||
}
|
||||
@ -379,13 +386,13 @@ static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
extern void VP8FiltersInitMIPSdspR2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMIPSdspR2(void) {
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_MIPSdspR2;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_MIPSdspR2;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_MIPSdspR2;
|
||||
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MIPSdspR2;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MIPSdspR2;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MIPSdspR2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS_DSP_R2
|
||||
|
@ -11,11 +11,11 @@
|
||||
//
|
||||
// Author: Prashant Patil (prashant.patil@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MSA)
|
||||
|
||||
#include "./msa_macro.h"
|
||||
#include "src/dsp/msa_macro.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
@ -66,8 +66,8 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horrizontal filter
|
||||
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
const uint8_t* preds = data;
|
||||
const uint8_t* in = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -129,8 +129,8 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
}
|
||||
|
||||
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -157,8 +157,8 @@ static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter
|
||||
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -190,9 +190,9 @@ static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
extern void VP8FiltersInitMSA(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMSA(void) {
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MSA;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MSA;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MSA;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MSA
|
||||
|
@ -11,12 +11,12 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include <assert.h>
|
||||
#include "./neon.h"
|
||||
#include "src/dsp/neon.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Helpful macros.
|
||||
@ -134,7 +134,7 @@ static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
|
||||
}
|
||||
|
||||
static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
}
|
||||
@ -196,7 +196,7 @@ static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
|
||||
}
|
||||
|
||||
static void GradientFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
}
|
||||
@ -251,9 +251,11 @@ static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
|
||||
// GradientUnfilter_NEON is correct but slower than the C-version,
|
||||
// at least on ARM64. For armv7, it's a wash.
|
||||
// So best is to disable it for now, but keep the idea around...
|
||||
// #define USE_GRADIENT_UNFILTER
|
||||
#if !defined(USE_GRADIENT_UNFILTER)
|
||||
#define USE_GRADIENT_UNFILTER 0 // ALTERNATE_CODE
|
||||
#endif
|
||||
|
||||
#if defined(USE_GRADIENT_UNFILTER)
|
||||
#if (USE_GRADIENT_UNFILTER == 1)
|
||||
#define GRAD_PROCESS_LANE(L) do { \
|
||||
const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1); /* rotate predictor in */ \
|
||||
const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1)); \
|
||||
@ -292,7 +294,7 @@ static void GradientPredictInverse_NEON(const uint8_t* const in,
|
||||
#undef GRAD_PROCESS_LANE
|
||||
|
||||
static void GradientUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter_NEON(NULL, in, out, width);
|
||||
} else {
|
||||
@ -311,7 +313,7 @@ extern void VP8FiltersInitNEON(void);
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitNEON(void) {
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_NEON;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_NEON;
|
||||
#if defined(USE_GRADIENT_UNFILTER)
|
||||
#if (USE_GRADIENT_UNFILTER == 1)
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_NEON;
|
||||
#endif
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
@ -24,16 +24,16 @@
|
||||
// Helpful macro.
|
||||
|
||||
# define SANITY_CHECK(in, out) \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; // Silence unused warning.
|
||||
|
||||
static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -51,7 +51,7 @@ static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
|
||||
static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -71,10 +71,11 @@ static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
@ -84,7 +85,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -94,7 +95,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
while (row < last_row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
out[0] = in[0] - in[-stride];
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -104,9 +105,10 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
@ -117,7 +119,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -125,7 +127,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
PredictLineTop(in, in - stride, out, width);
|
||||
PredictLineTop_SSE2(in, in - stride, out, width);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -135,14 +137,14 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Gradient filter.
|
||||
|
||||
static WEBP_INLINE int GradientPredictorC(uint8_t a, uint8_t b, uint8_t c) {
|
||||
static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
const int g = a + b - c;
|
||||
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
|
||||
}
|
||||
|
||||
static void GradientPredictDirect(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
const int max_pos = length & ~7;
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -161,14 +163,14 @@ static void GradientPredictDirect(const uint8_t* const row,
|
||||
_mm_storel_epi64((__m128i*)(out + i), H);
|
||||
}
|
||||
for (; i < length; ++i) {
|
||||
out[i] = row[i] - GradientPredictorC(row[i - 1], top[i], top[i - 1]);
|
||||
out[i] = row[i] - GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
SANITY_CHECK(in, out);
|
||||
@ -178,7 +180,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLineLeft(in + 1, out + 1, width - 1);
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -187,7 +189,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
out[0] = in[0] - in[-stride];
|
||||
GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -198,26 +200,27 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
|
||||
static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Inverse transforms
|
||||
|
||||
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
int i;
|
||||
__m128i last;
|
||||
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
|
||||
@ -238,10 +241,10 @@ static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
for (; i < width; ++i) out[i] = in[i] + out[i - 1];
|
||||
}
|
||||
|
||||
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_SSE2(NULL, in, out, width);
|
||||
} else {
|
||||
int i;
|
||||
const int max_pos = width & ~31;
|
||||
@ -260,9 +263,9 @@ static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientPredictInverse(const uint8_t* const in,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const row, int length) {
|
||||
static void GradientPredictInverse_SSE2(const uint8_t* const in,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const row, int length) {
|
||||
if (length > 0) {
|
||||
int i;
|
||||
const int max_pos = length & ~7;
|
||||
@ -293,18 +296,18 @@ static void GradientPredictInverse(const uint8_t* const in,
|
||||
_mm_storel_epi64((__m128i*)&row[i], out);
|
||||
}
|
||||
for (; i < length; ++i) {
|
||||
row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]);
|
||||
row[i] = in[i] + GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
static void GradientUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
if (prev == NULL) {
|
||||
HorizontalUnfilter(NULL, in, out, width);
|
||||
HorizontalUnfilter_SSE2(NULL, in, out, width);
|
||||
} else {
|
||||
out[0] = in[0] + prev[0]; // predict from above
|
||||
GradientPredictInverse(in + 1, prev + 1, out + 1, width - 1);
|
||||
GradientPredictInverse_SSE2(in + 1, prev + 1, out + 1, width - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -314,13 +317,13 @@ static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
|
||||
extern void VP8FiltersInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2;
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2;
|
||||
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2;
|
||||
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_SSE2;
|
||||
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -13,14 +13,15 @@
|
||||
// Jyrki Alakuijala (jyrki@google.com)
|
||||
// Urvang Joshi (urvang@google.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include "../dec/vp8li_dec.h"
|
||||
#include "../utils/endian_inl_utils.h"
|
||||
#include "./lossless.h"
|
||||
#include "./lossless_common.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
|
||||
#define MAX_DIFF_COST (1e30f)
|
||||
|
||||
@ -80,8 +81,9 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
|
||||
return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
|
||||
// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
|
||||
#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
|
||||
// gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
|
||||
// inlined.
|
||||
#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
|
||||
# define LOCAL_INLINE __attribute__ ((noinline))
|
||||
#else
|
||||
# define LOCAL_INLINE WEBP_INLINE
|
||||
@ -107,69 +109,69 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictors
|
||||
|
||||
static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)top;
|
||||
(void)left;
|
||||
return ARGB_BLACK;
|
||||
}
|
||||
static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)top;
|
||||
return left;
|
||||
}
|
||||
static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[0];
|
||||
}
|
||||
static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[1];
|
||||
}
|
||||
static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[-1];
|
||||
}
|
||||
static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average3(left, top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[0]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[-1], top[0]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[0], top[1]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Select(top[0], left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
|
||||
GENERATE_PREDICTOR_ADD(Predictor0, PredictorAdd0)
|
||||
static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
|
||||
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint32_t left = out[-1];
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
@ -177,29 +179,29 @@ static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
|
||||
}
|
||||
(void)upper;
|
||||
}
|
||||
GENERATE_PREDICTOR_ADD(Predictor2, PredictorAdd2)
|
||||
GENERATE_PREDICTOR_ADD(Predictor3, PredictorAdd3)
|
||||
GENERATE_PREDICTOR_ADD(Predictor4, PredictorAdd4)
|
||||
GENERATE_PREDICTOR_ADD(Predictor5, PredictorAdd5)
|
||||
GENERATE_PREDICTOR_ADD(Predictor6, PredictorAdd6)
|
||||
GENERATE_PREDICTOR_ADD(Predictor7, PredictorAdd7)
|
||||
GENERATE_PREDICTOR_ADD(Predictor8, PredictorAdd8)
|
||||
GENERATE_PREDICTOR_ADD(Predictor9, PredictorAdd9)
|
||||
GENERATE_PREDICTOR_ADD(Predictor10, PredictorAdd10)
|
||||
GENERATE_PREDICTOR_ADD(Predictor11, PredictorAdd11)
|
||||
GENERATE_PREDICTOR_ADD(Predictor12, PredictorAdd12)
|
||||
GENERATE_PREDICTOR_ADD(Predictor13, PredictorAdd13)
|
||||
GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C)
|
||||
GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Inverse prediction.
|
||||
static void PredictorInverseTransform(const VP8LTransform* const transform,
|
||||
int y_start, int y_end,
|
||||
const uint32_t* in, uint32_t* out) {
|
||||
static void PredictorInverseTransform_C(const VP8LTransform* const transform,
|
||||
int y_start, int y_end,
|
||||
const uint32_t* in, uint32_t* out) {
|
||||
const int width = transform->xsize_;
|
||||
if (y_start == 0) { // First Row follows the L (mode=1) mode.
|
||||
PredictorAdd0(in, NULL, 1, out);
|
||||
PredictorAdd1(in + 1, NULL, width - 1, out + 1);
|
||||
PredictorAdd0_C(in, NULL, 1, out);
|
||||
PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
|
||||
in += width;
|
||||
out += width;
|
||||
++y_start;
|
||||
@ -217,7 +219,7 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
|
||||
const uint32_t* pred_mode_src = pred_mode_base;
|
||||
int x = 1;
|
||||
// First pixel follows the T (mode=2) mode.
|
||||
PredictorAdd2(in, out - width, 1, out);
|
||||
PredictorAdd2_C(in, out - width, 1, out);
|
||||
// .. the rest:
|
||||
while (x < width) {
|
||||
const VP8LPredictorAddSubFunc pred_func =
|
||||
@ -272,8 +274,8 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
const uint32_t argb = src[i];
|
||||
const uint32_t green = argb >> 8;
|
||||
const uint32_t red = argb >> 16;
|
||||
int new_red = red;
|
||||
int new_blue = argb;
|
||||
int new_red = red & 0xff;
|
||||
int new_blue = argb & 0xff;
|
||||
new_red += ColorTransformDelta(m->green_to_red_, green);
|
||||
new_red &= 0xff;
|
||||
new_blue += ColorTransformDelta(m->green_to_blue_, green);
|
||||
@ -284,9 +286,9 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
}
|
||||
|
||||
// Color space inverse transform.
|
||||
static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
|
||||
int y_start, int y_end,
|
||||
const uint32_t* src, uint32_t* dst) {
|
||||
static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
|
||||
int y_start, int y_end,
|
||||
const uint32_t* src, uint32_t* dst) {
|
||||
const int width = transform->xsize_;
|
||||
const int tile_width = 1 << transform->bits_;
|
||||
const int mask = tile_width - 1;
|
||||
@ -362,10 +364,10 @@ STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform, \
|
||||
} \
|
||||
}
|
||||
|
||||
COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
|
||||
VP8GetARGBIndex, VP8GetARGBValue)
|
||||
COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
|
||||
8b, VP8GetAlphaIndex, VP8GetAlphaValue)
|
||||
COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
|
||||
uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
|
||||
COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
|
||||
uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
|
||||
|
||||
#undef COLOR_INDEX_INVERSE
|
||||
|
||||
@ -380,7 +382,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
|
||||
VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
|
||||
break;
|
||||
case PREDICTOR_TRANSFORM:
|
||||
PredictorInverseTransform(transform, row_start, row_end, in, out);
|
||||
PredictorInverseTransform_C(transform, row_start, row_end, in, out);
|
||||
if (row_end != transform->ysize_) {
|
||||
// The last predicted row in this iteration will be the top-pred row
|
||||
// for the first row in next iteration.
|
||||
@ -389,7 +391,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
|
||||
}
|
||||
break;
|
||||
case CROSS_COLOR_TRANSFORM:
|
||||
ColorSpaceInverseTransform(transform, row_start, row_end, in, out);
|
||||
ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
|
||||
break;
|
||||
case COLOR_INDEXING_TRANSFORM:
|
||||
if (in == out && transform->bits_ > 0) {
|
||||
@ -403,9 +405,9 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
|
||||
VP8LSubSampleSize(transform->xsize_, transform->bits_);
|
||||
uint32_t* const src = out + out_stride - in_stride;
|
||||
memmove(src, out, in_stride * sizeof(*src));
|
||||
ColorIndexInverseTransform(transform, row_start, row_end, src, out);
|
||||
ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
|
||||
} else {
|
||||
ColorIndexInverseTransform(transform, row_start, row_end, in, out);
|
||||
ColorIndexInverseTransform_C(transform, row_start, row_end, in, out);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -452,7 +454,7 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
const uint32_t argb = *src++;
|
||||
const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
|
||||
const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
*dst++ = ba;
|
||||
*dst++ = rg;
|
||||
#else
|
||||
@ -469,7 +471,7 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
const uint32_t argb = *src++;
|
||||
const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
|
||||
const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
|
||||
#ifdef WEBP_SWAP_16BIT_CSP
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
*dst++ = gb;
|
||||
*dst++ = rg;
|
||||
#else
|
||||
@ -496,22 +498,7 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
|
||||
#if !defined(WORDS_BIGENDIAN)
|
||||
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
|
||||
WebPUint32ToMem(dst, BSwap32(argb));
|
||||
#else // WEBP_REFERENCE_IMPLEMENTATION
|
||||
dst[0] = (argb >> 24) & 0xff;
|
||||
dst[1] = (argb >> 16) & 0xff;
|
||||
dst[2] = (argb >> 8) & 0xff;
|
||||
dst[3] = (argb >> 0) & 0xff;
|
||||
#endif
|
||||
#else // WORDS_BIGENDIAN
|
||||
dst[0] = (argb >> 0) & 0xff;
|
||||
dst[1] = (argb >> 8) & 0xff;
|
||||
dst[2] = (argb >> 16) & 0xff;
|
||||
dst[3] = (argb >> 24) & 0xff;
|
||||
#endif
|
||||
dst += sizeof(argb);
|
||||
}
|
||||
} else {
|
||||
@ -593,23 +580,23 @@ extern void VP8LDspInitMSA(void);
|
||||
static volatile VP8CPUInfo lossless_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&lossless_last_cpuinfo_used;
|
||||
|
||||
#define COPY_PREDICTOR_ARRAY(IN, OUT) do { \
|
||||
(OUT)[0] = IN##0; \
|
||||
(OUT)[1] = IN##1; \
|
||||
(OUT)[2] = IN##2; \
|
||||
(OUT)[3] = IN##3; \
|
||||
(OUT)[4] = IN##4; \
|
||||
(OUT)[5] = IN##5; \
|
||||
(OUT)[6] = IN##6; \
|
||||
(OUT)[7] = IN##7; \
|
||||
(OUT)[8] = IN##8; \
|
||||
(OUT)[9] = IN##9; \
|
||||
(OUT)[10] = IN##10; \
|
||||
(OUT)[11] = IN##11; \
|
||||
(OUT)[12] = IN##12; \
|
||||
(OUT)[13] = IN##13; \
|
||||
(OUT)[14] = IN##0; /* <- padding security sentinels*/ \
|
||||
(OUT)[15] = IN##0; \
|
||||
#define COPY_PREDICTOR_ARRAY(IN, OUT) do { \
|
||||
(OUT)[0] = IN##0_C; \
|
||||
(OUT)[1] = IN##1_C; \
|
||||
(OUT)[2] = IN##2_C; \
|
||||
(OUT)[3] = IN##3_C; \
|
||||
(OUT)[4] = IN##4_C; \
|
||||
(OUT)[5] = IN##5_C; \
|
||||
(OUT)[6] = IN##6_C; \
|
||||
(OUT)[7] = IN##7_C; \
|
||||
(OUT)[8] = IN##8_C; \
|
||||
(OUT)[9] = IN##9_C; \
|
||||
(OUT)[10] = IN##10_C; \
|
||||
(OUT)[11] = IN##11_C; \
|
||||
(OUT)[12] = IN##12_C; \
|
||||
(OUT)[13] = IN##13_C; \
|
||||
(OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
|
||||
(OUT)[15] = IN##0_C; \
|
||||
} while (0);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
|
||||
@ -620,18 +607,21 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
|
||||
COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
|
||||
COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
|
||||
|
||||
VP8LTransformColorInverse = VP8LTransformColorInverse_C;
|
||||
|
||||
VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
|
||||
VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
|
||||
VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
|
||||
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
|
||||
#endif
|
||||
|
||||
VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
|
||||
VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
|
||||
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
|
||||
|
||||
VP8LMapColor32b = MapARGB;
|
||||
VP8LMapColor8b = MapAlpha;
|
||||
VP8LMapColor32b = MapARGB_C;
|
||||
VP8LMapColor8b = MapAlpha_C;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
@ -640,11 +630,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
|
||||
VP8LDspInitSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8LDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||
VP8LDspInitMIPSdspR2();
|
||||
@ -656,6 +641,24 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8LDspInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(VP8LAddGreenToBlueAndRed != NULL);
|
||||
assert(VP8LTransformColorInverse != NULL);
|
||||
assert(VP8LConvertBGRAToRGBA != NULL);
|
||||
assert(VP8LConvertBGRAToRGB != NULL);
|
||||
assert(VP8LConvertBGRAToBGR != NULL);
|
||||
assert(VP8LConvertBGRAToRGBA4444 != NULL);
|
||||
assert(VP8LConvertBGRAToRGB565 != NULL);
|
||||
assert(VP8LMapColor32b != NULL);
|
||||
assert(VP8LMapColor8b != NULL);
|
||||
|
||||
lossless_last_cpuinfo_used = VP8GetCPUInfo;
|
||||
}
|
||||
#undef COPY_PREDICTOR_ARRAY
|
||||
|
@ -15,18 +15,18 @@
|
||||
#ifndef WEBP_DSP_LOSSLESS_H_
|
||||
#define WEBP_DSP_LOSSLESS_H_
|
||||
|
||||
#include "../webp/types.h"
|
||||
#include "../webp/decode.h"
|
||||
#include "src/webp/types.h"
|
||||
#include "src/webp/decode.h"
|
||||
|
||||
#include "../enc/histogram_enc.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef WEBP_EXPERIMENTAL_FEATURES
|
||||
#include "../enc/delta_palettization_enc.h"
|
||||
#include "src/enc/delta_palettization_enc.h"
|
||||
#endif // WEBP_EXPERIMENTAL_FEATURES
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -124,7 +124,7 @@ void VP8LDspInit(void);
|
||||
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
|
||||
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
|
||||
uint32_t* const dst, int num_pixels);
|
||||
uint32_t* dst, int num_pixels);
|
||||
extern VP8LTransformColorFunc VP8LTransformColor;
|
||||
typedef void (*VP8LCollectColorBlueTransformsFunc)(
|
||||
const uint32_t* argb, int stride,
|
||||
|
@ -16,9 +16,9 @@
|
||||
#ifndef WEBP_DSP_LOSSLESS_COMMON_H_
|
||||
#define WEBP_DSP_LOSSLESS_COMMON_H_
|
||||
|
||||
#include "../webp/types.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#include "../utils/utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user