mirror of
https://github.com/webmproject/libwebp.git
synced 2025-04-11 19:36:47 +02:00
Compare commits
165 Commits
v1.4.0-rc1
...
main
Author | SHA1 | Date | |
---|---|---|---|
|
5225592f6b | ||
|
00338240c1 | ||
|
44f91b0ddd | ||
|
ee8e8c620f | ||
|
a1ad3f1e37 | ||
|
321561b41f | ||
|
e0ae21d231 | ||
|
a4183d94c7 | ||
|
f2b3f52733 | ||
|
7c70ff7a3b | ||
|
9dd5ae819b | ||
|
613be8fc61 | ||
|
1d86819f49 | ||
|
743a5f092d | ||
|
565da14882 | ||
|
319860e919 | ||
|
815fc1e110 | ||
|
980b708e2c | ||
|
73b728cbb9 | ||
|
6a22b6709c | ||
|
7ed2b10ef0 | ||
|
654bfb040c | ||
|
f8f2410710 | ||
|
2af6c034ac | ||
|
a4d7a71533 | ||
|
c3d85ce4cf | ||
|
ad14e811cf | ||
|
74cd026edb | ||
|
a027aa93de | ||
|
25e17c686f | ||
|
aa2684fccc | ||
|
369238461b | ||
|
ceea8ff6b3 | ||
|
e4f7a9f0c7 | ||
|
1b4c967fbb | ||
|
9e5ecfaf00 | ||
|
da0d9c7d4e | ||
|
fcff86c71b | ||
|
b76c4a8416 | ||
|
306335198d | ||
|
4c85d860ea | ||
|
0ab789e067 | ||
|
0323645066 | ||
|
61e2cfdadd | ||
|
7bda3deb89 | ||
|
2ddaaf0aa5 | ||
|
a3ba6f19e9 | ||
|
f999d94f4a | ||
|
dfdcb7f95c | ||
|
78ed683978 | ||
|
d516a68e54 | ||
|
874069042e | ||
|
fdb229ea3a | ||
|
0c3cd9cc2c | ||
|
169dfbf931 | ||
|
2dd5eb9862 | ||
|
23bbafbeb8 | ||
|
35915b389e | ||
|
a32b436bd5 | ||
|
04d4b4f387 | ||
|
b1cb37e659 | ||
|
201894ef24 | ||
|
02eac8a741 | ||
|
84b118c9c3 | ||
|
052cf42f1a | ||
|
220ee52967 | ||
|
7861947813 | ||
|
14f09ab75b | ||
|
a78c5356ba | ||
|
bc49176355 | ||
|
34f9223829 | ||
|
367ca938f1 | ||
|
a582b53b74 | ||
|
0fd25d8406 | ||
|
f888291359 | ||
|
40e4ca60ea | ||
|
57883c78ed | ||
|
1c8eba978b | ||
|
2e81017c7a | ||
|
94de6c7fed | ||
|
51d9832a36 | ||
|
7bcb36b884 | ||
|
8e0cc14c3e | ||
|
cea684626d | ||
|
615e58744f | ||
|
233e86b91f | ||
|
1a29fd2fc3 | ||
|
dd9d3770d7 | ||
|
ab451a495c | ||
|
f9a480f7c3 | ||
|
04834acae7 | ||
|
39a602afe6 | ||
|
f28c837dc1 | ||
|
74be8e22d9 | ||
|
0c01db7c3c | ||
|
f2d6dc1eef | ||
|
caa19e5b3a | ||
|
c9dd9bd40b | ||
|
8a7c8dc662 | ||
|
f0c53cd966 | ||
|
eef903d04a | ||
|
6296cc8d0d | ||
|
fbd93896a6 | ||
|
cc7ff5459a | ||
|
4e2828bae8 | ||
|
d742b24a88 | ||
|
c7bb4cb585 | ||
|
952a989b1b | ||
|
dde11574b0 | ||
|
a1ca153d51 | ||
|
3bd9420289 | ||
|
d27d246e42 | ||
|
4838611f91 | ||
|
314a142a34 | ||
|
3bfb05e38c | ||
|
baa93808d9 | ||
|
41a5e582c2 | ||
|
fb444b692b | ||
|
c1c89f5189 | ||
|
66408c2c7c | ||
|
ac1e410ded | ||
|
b78d39571f | ||
|
cff21a7d87 | ||
|
6853a8e5ac | ||
|
9bc09db4b8 | ||
|
0a9f1c19f8 | ||
|
db0cb9c27e | ||
|
ff2b5b15ae | ||
|
c4af79d053 | ||
|
0ec80aef3d | ||
|
96d79f8481 | ||
|
c35c7e0240 | ||
|
f2fe8decce | ||
|
9ce982fdf2 | ||
|
3ba8af1a33 | ||
|
ea0e121b6a | ||
|
27731afd47 | ||
|
ddd6245eb7 | ||
|
50074930e3 | ||
|
20e92f7d40 | ||
|
4f200de591 | ||
|
64186bb36c | ||
|
0905f61c85 | ||
|
e86787586b | ||
|
5e5b8f0c95 | ||
|
45129ee027 | ||
|
ee26766a89 | ||
|
7ec51c5916 | ||
|
3cd16fd3e2 | ||
|
971a03d820 | ||
|
1bf198a22b | ||
|
1e462ca80e | ||
|
64d1ec23ac | ||
|
a90160e11a | ||
|
a7aa7525b8 | ||
|
68ff4e1efe | ||
|
79e7968ad0 | ||
|
d33455cd31 | ||
|
a67ff735a2 | ||
|
edc289092a | ||
|
3cada4cef4 | ||
|
dc9505855e | ||
|
845d5476a8 | ||
|
8a6a55bba8 | ||
|
cf7c5a5de8 |
.gitignoreAUTHORSCMakeLists.txtChangeLogMakefile.vcNEWSREADME.md
cmake
configure.acdoc
examples
Makefile.amanim_diff.canim_dump.canim_util.ccwebp.cdwebp.cgif2webp.cimg2webp.cvwebp.cwebpinfo.cwebpmux.c
extras
imageio
iosbuild.shman
sharpyuv
src
Makefile.am
dec
demux
dsp
Makefile.amalpha_processing_sse2.ccost.ccost_mips32.ccost_neon.ccost_sse2.ccpu.hdec.cdec_mips32.cdec_mips_dsp_r2.cdec_msa.cdec_neon.cdec_sse2.cdsp.henc.cenc_mips32.cenc_mips_dsp_r2.cenc_msa.cenc_neon.cenc_sse2.cenc_sse41.cfilters.cfilters_mips_dsp_r2.cfilters_msa.cfilters_neon.cfilters_sse2.clossless.clossless.hlossless_avx2.clossless_common.hlossless_enc.clossless_enc_avx2.clossless_enc_mips32.clossless_enc_mips_dsp_r2.clossless_enc_msa.clossless_enc_neon.clossless_enc_sse2.clossless_enc_sse41.clossless_neon.clossless_sse2.clossless_sse41.c
1
.gitignore
vendored
1
.gitignore
vendored
@ -52,5 +52,6 @@ tests/fuzzer/animdecoder_fuzzer
|
||||
tests/fuzzer/animencoder_fuzzer
|
||||
tests/fuzzer/demux_api_fuzzer
|
||||
tests/fuzzer/enc_dec_fuzzer
|
||||
tests/fuzzer/huffman_fuzzer
|
||||
tests/fuzzer/mux_demux_api_fuzzer
|
||||
tests/fuzzer/simple_api_fuzzer
|
||||
|
3
AUTHORS
3
AUTHORS
@ -11,11 +11,13 @@ Contributors:
|
||||
- Christopher Degawa (ccom at randomderp dot com)
|
||||
- Clement Courbet (courbet at google dot com)
|
||||
- Djordje Pesut (djordje dot pesut at imgtec dot com)
|
||||
- Frank (1433351828 at qq dot com)
|
||||
- Frank Barchard (fbarchard at google dot com)
|
||||
- Hui Su (huisu at google dot com)
|
||||
- H. Vetinari (h dot vetinari at gmx dot com)
|
||||
- Ilya Kurdyukov (jpegqs at gmail dot com)
|
||||
- Ingvar Stepanyan (rreverser at google dot com)
|
||||
- Istvan Stefan (Istvan dot Stefan at arm dot com)
|
||||
- James Zern (jzern at google dot com)
|
||||
- Jan Engelhardt (jengelh at medozas dot de)
|
||||
- Jehan (jehan at girinstud dot io)
|
||||
@ -62,6 +64,7 @@ Contributors:
|
||||
- Vincent Rabaud (vrabaud at google dot com)
|
||||
- Vlad Tsyrklevich (vtsyrklevich at chromium dot org)
|
||||
- Wan-Teh Chang (wtc at google dot com)
|
||||
- wrv (wrv at utexas dot edu)
|
||||
- Yang Zhang (yang dot zhang at arm dot com)
|
||||
- Yannis Guyon (yguyon at google dot com)
|
||||
- Zhi An Ng (zhin at chromium dot org)
|
||||
|
@ -9,11 +9,7 @@
|
||||
if(APPLE)
|
||||
cmake_minimum_required(VERSION 3.17)
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.7)
|
||||
endif()
|
||||
|
||||
if(POLICY CMP0072)
|
||||
cmake_policy(SET CMP0072 NEW)
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
endif()
|
||||
|
||||
project(WebP C)
|
||||
@ -45,6 +41,7 @@ option(WEBP_BUILD_LIBWEBPMUX "Build the libwebpmux library." ON)
|
||||
option(WEBP_BUILD_WEBPMUX "Build the webpmux command line tool." ON)
|
||||
option(WEBP_BUILD_EXTRAS "Build extras." ON)
|
||||
option(WEBP_BUILD_WEBP_JS "Emscripten build of webp.js." OFF)
|
||||
option(WEBP_BUILD_FUZZTEST "Build the fuzztest tests." OFF)
|
||||
option(WEBP_USE_THREAD "Enable threading support" ON)
|
||||
option(WEBP_NEAR_LOSSLESS "Enable near-lossless encoding" ON)
|
||||
option(WEBP_ENABLE_SWAP_16BIT_CSP "Enable byte swap for 16 bit colorspaces."
|
||||
@ -375,9 +372,11 @@ if(XCODE)
|
||||
endif()
|
||||
target_link_libraries(webpdecoder ${WEBP_DEP_LIBRARIES})
|
||||
target_include_directories(
|
||||
webpdecoder PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
|
||||
webpdecoder
|
||||
PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
INTERFACE
|
||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR};${CMAKE_CURRENT_BINARY_DIR}>"
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
|
||||
set_target_properties(
|
||||
webpdecoder
|
||||
PROPERTIES PUBLIC_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/src/webp/decode.h;\
|
||||
@ -479,6 +478,7 @@ if(WEBP_BUILD_ANIM_UTILS
|
||||
OR WEBP_BUILD_CWEBP
|
||||
OR WEBP_BUILD_DWEBP
|
||||
OR WEBP_BUILD_EXTRAS
|
||||
OR WEBP_BUILD_FUZZTEST
|
||||
OR WEBP_BUILD_GIF2WEBP
|
||||
OR WEBP_BUILD_IMG2WEBP
|
||||
OR WEBP_BUILD_VWEBP
|
||||
@ -563,7 +563,8 @@ if(WEBP_BUILD_GIF2WEBP)
|
||||
add_executable(gif2webp ${GIF2WEBP_SRCS})
|
||||
target_link_libraries(gif2webp exampleutil imageioutil webp libwebpmux
|
||||
${WEBP_DEP_GIF_LIBRARIES})
|
||||
target_include_directories(gif2webp PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
target_include_directories(gif2webp PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
install(TARGETS gif2webp RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endif()
|
||||
|
||||
@ -771,6 +772,10 @@ if(WEBP_BUILD_ANIM_UTILS)
|
||||
target_include_directories(anim_dump PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
endif()
|
||||
|
||||
if(WEBP_BUILD_FUZZTEST)
|
||||
add_subdirectory(tests/fuzzer)
|
||||
endif()
|
||||
|
||||
# Install the different headers and libraries.
|
||||
install(
|
||||
TARGETS ${INSTALLED_LIBRARIES}
|
||||
|
141
ChangeLog
141
ChangeLog
@ -1,3 +1,144 @@
|
||||
c3d85ce4 update NEWS
|
||||
ad14e811 tests/fuzzer/*: add missing <string_view> include
|
||||
74cd026e fuzz_utils.cc: fix build error w/WEBP_REDUCE_SIZE
|
||||
a027aa93 mux_demux_api_fuzzer.cc: fix -Wshadow warning
|
||||
25e17c68 update ChangeLog (tag: v1.5.0-rc1)
|
||||
aa2684fc update NEWS
|
||||
36923846 bump version to 1.5.0
|
||||
ceea8ff6 update AUTHORS
|
||||
e4f7a9f0 img2webp: add a warning for unused options
|
||||
1b4c967f Merge "Properly check the data size against the end of the RIFF chunk" into main
|
||||
9e5ecfaf Properly check the data size against the end of the RIFF chunk
|
||||
da0d9c7d examples: exit w/failure w/no args
|
||||
fcff86c7 {gif,img}2webp: sync -m help w/cwebp
|
||||
b76c4a84 man/img2webp.1: sync -m text w/cwebp.1 & gif2webp.1
|
||||
30633519 muxread: fix reading of buffers > riff size
|
||||
4c85d860 yuv.h: update RGB<->YUV coefficients in comment
|
||||
0ab789e0 Merge changes I6dfedfd5,I2376e2dc into main
|
||||
03236450 {ios,xcframework}build.sh: fix compilation w/Xcode 16
|
||||
61e2cfda rework AddVectorEq_SSE2
|
||||
7bda3deb rework AddVector_SSE2
|
||||
2ddaaf0a Fix variable names in SharpYuvComputeConversionMatrix
|
||||
a3ba6f19 Makefile.vc: fix gif2webp link error
|
||||
f999d94f gif2webp: add -sharp_yuv/-near_lossless
|
||||
dfdcb7f9 Merge "lossless.h: fix function declaration mismatches" into main (tag: webp-rfc9649)
|
||||
78ed6839 fix overread in Intra4Preds_NEON
|
||||
d516a68e lossless.h: fix function declaration mismatches
|
||||
87406904 Merge "Improve documentation of SharpYuvConversionMatrix." into main
|
||||
fdb229ea Merge changes I07a7e36a,Ib29980f7,I2316122d,I2356e314,I32b53dd3, ... into main
|
||||
0c3cd9cc Improve documentation of SharpYuvConversionMatrix.
|
||||
169dfbf9 disable Intra4Preds_NEON
|
||||
2dd5eb98 dsp/yuv*: use WEBP_RESTRICT qualifier
|
||||
23bbafbe dsp/upsampling*: use WEBP_RESTRICT qualifier
|
||||
35915b38 dsp/rescaler*: use WEBP_RESTRICT qualifier
|
||||
a32b436b dsp/lossless*: use WEBP_RESTRICT qualifier
|
||||
04d4b4f3 dsp/filters*: use WEBP_RESTRICT qualifier
|
||||
b1cb37e6 dsp/enc*: use WEBP_RESTRICT qualifier
|
||||
201894ef dsp/dec*: use WEBP_RESTRICT qualifier
|
||||
02eac8a7 dsp/cost*: use WEBP_RESTRICT qualifier
|
||||
84b118c9 Merge "webp-container-spec: normalize notes & unknown chunk link" into main
|
||||
052cf42f webp-container-spec: normalize notes & unknown chunk link
|
||||
220ee529 Search for best predictor transform bits
|
||||
78619478 Try to reduce the sampling for the entropy image
|
||||
14f09ab7 webp-container-spec: reorder chunk size - N text
|
||||
a78c5356 Remove a useless malloc for entropy image
|
||||
bc491763 Merge "Refactor predictor finding" into main
|
||||
34f92238 man/{cwebp,img2webp}.1: rm 'if needed' from -sharp_yuv
|
||||
367ca938 Refactor predictor finding
|
||||
a582b53b webp-lossless-bitstream-spec: clarify some text
|
||||
0fd25d84 Merge "anim_encode.c: fix function ref in comment" into main
|
||||
f8882913 anim_encode.c: fix function ref in comment
|
||||
40e4ca60 specs_generation.md: update kramdown command line
|
||||
57883c78 img2webp: add -exact/-noexact per-frame options
|
||||
1c8eba97 img2webp,cosmetics: add missing '.' spacers to help
|
||||
2e81017c Convert predictor_enc.c to fixed point
|
||||
94de6c7f Merge "Fix fuzztest link errors w/-DBUILD_SHARED_LIBS=1" into main
|
||||
51d9832a Fix fuzztest link errors w/-DBUILD_SHARED_LIBS=1
|
||||
7bcb36b8 Merge "Fix static overflow warning." into main
|
||||
8e0cc14c Fix static overflow warning.
|
||||
cea68462 README.md: add security report note
|
||||
615e5874 Merge "make VP8LPredictor[01]_C() static" into main
|
||||
233e86b9 Merge changes Ie43dc5ef,I94cd8bab into main
|
||||
1a29fd2f make VP8LPredictor[01]_C() static
|
||||
dd9d3770 Do*Filter_*: remove row & num_rows parameters
|
||||
ab451a49 Do*Filter_C: remove dead 'inverse' code paths
|
||||
f9a480f7 {TrueMotion,TM16}_NEON: remove zero extension
|
||||
04834aca Merge changes I25c30a9e,I0a192fc6,I4cf89575 into main
|
||||
39a602af webp-lossless-bitstream-spec: normalize predictor transform ref
|
||||
f28c837d Merge "webp-container-spec: align anim pseudocode w/prose" into main
|
||||
74be8e22 Fix implicit conversion issues
|
||||
0c01db7c Merge "Increase the transform bits if possible." into main
|
||||
f2d6dc1e Increase the transform bits if possible.
|
||||
caa19e5b update link to issue tracker
|
||||
c9dd9bd4 webp-container-spec: align anim pseudocode w/prose
|
||||
8a7c8dc6 WASM: Enable VP8L_USE_FAST_LOAD
|
||||
f0c53cd9 WASM: don't use USE_GENERIC_TREE
|
||||
eef903d0 WASM: Enable 64-bit BITS caching
|
||||
6296cc8d iterator_enc: make VP8IteratorReset() static
|
||||
fbd93896 histogram_enc: make VP8LGetHistogramSize static
|
||||
cc7ff545 cost_enc: make VP8CalculateLevelCosts[] static
|
||||
4e2828ba vp8l_dec: make VP8LClear() static
|
||||
d742b24a Intra16Preds_NEON: fix truemotion saturation
|
||||
c7bb4cb5 Intra4Preds_NEON: fix truemotion saturation
|
||||
952a989b Merge "Remove TODO now that log is using fixed point." into main
|
||||
dde11574 Remove TODO now that log is using fixed point.
|
||||
a1ca153d Fix hidden myerr in my_error_exit
|
||||
3bd94202 Merge changes Iff6e47ed,I24c67cd5,Id781e761 into main
|
||||
d27d246e Merge "Convert VP8LFastSLog2 to fixed point" into main
|
||||
4838611f Disable msg_code use in fuzzing mode
|
||||
314a142a Use QuantizeBlock_NEON for VP8EncQuantizeBlockWHT on Arm
|
||||
3bfb05e3 Add AArch64 Neon implementation of Intra16Preds
|
||||
baa93808 Add AArch64 Neon implementation of Intra4Preds
|
||||
41a5e582 Fix errors when compiling code as C++
|
||||
fb444b69 Convert VP8LFastSLog2 to fixed point
|
||||
c1c89f51 Fix WEBP_NODISCARD comment and C++ version
|
||||
66408c2c Switch the histogram_enc.h API to fixed point
|
||||
ac1e410d Remove leftover tiff dep
|
||||
b78d3957 Disable TIFF on fuzztest.
|
||||
cff21a7d Do not build statically on oss-fuzz.
|
||||
6853a8e5 Merge "Move more internal fuzzers to public." into main
|
||||
9bc09db4 Merge "Convert VP8LFastLog2 to fixed point" into main
|
||||
0a9f1c19 Convert VP8LFastLog2 to fixed point
|
||||
db0cb9c2 Move more internal fuzzers to public.
|
||||
ff2b5b15 Merge "advanced_api_fuzzer.cc: use crop dims in OOM check" into main
|
||||
c4af79d0 Put 0 at the end of a palette and do not store it.
|
||||
0ec80aef Delete last references to delta palettization
|
||||
96d79f84 advanced_api_fuzzer.cc: use crop dims in OOM check
|
||||
c35c7e02 Fix huffman fuzzer to not leak.
|
||||
f2fe8dec Bump fuzztest dependency.
|
||||
9ce982fd Fix fuzz tests to work on oss-fuzz
|
||||
3ba8af1a Do not escape quotes anymore in build.sh
|
||||
ea0e121b Allow centipede to be used as a fuzzing engine.
|
||||
27731afd make VP8I4ModeOffsets & VP8MakeIntra4Preds static
|
||||
ddd6245e oss-fuzz/build.sh: use heredoc for script creation
|
||||
50074930 oss-fuzz/build.sh,cosmetics: fix indent
|
||||
20e92f7d Limit the possible fuzz engines.
|
||||
4f200de5 Switch public fuzz tests to fuzztest.
|
||||
64186bb3 Add huffman_fuzzer to .gitignore
|
||||
0905f61c Move build script from oss-fuzz repo to here.
|
||||
e8678758 Fix link to Javascript documentation
|
||||
5e5b8f0c Fix SSE2 Transform_AC3 function name
|
||||
45129ee0 Revert "Check all the rows."
|
||||
ee26766a Check all the rows.
|
||||
7ec51c59 Increase the transform bits if possible.
|
||||
3cd16fd3 Revert "Increase the transform bits if possible."
|
||||
971a03d8 Increase the transform bits if possible.
|
||||
1bf198a2 Allow transform_bits to be different during encoding.
|
||||
1e462ca8 Define MAX_TRANSFORM_BITS according to the specification.
|
||||
64d1ec23 Use (MIN/NUM)_(TRANSFORM/HUFFMAN)_BITS where appropriate
|
||||
a90160e1 Refactor histograms in predictors.
|
||||
a7aa7525 Fix some function declarations
|
||||
68ff4e1e Merge "jpegdec: add a hint for EOF/READ errors" into main
|
||||
79e7968a jpegdec: add a hint for EOF/READ errors
|
||||
d33455cd man/*: s/BUGS/REPORTING BUGS/
|
||||
a67ff735 normalize example exit status
|
||||
edc28909 upsampling_{neon,sse41}: fix int sanitizer warning
|
||||
3cada4ce ImgIoUtilReadFile: check ftell() return
|
||||
dc950585 Merge tag 'v1.4.0'
|
||||
845d5476 update ChangeLog (tag: v1.4.0, origin/1.4.0)
|
||||
8a6a55bb update NEWS
|
||||
cf7c5a5d provide a way to opt-out/override WEBP_NODISCARD
|
||||
cc34288a update ChangeLog (tag: v1.4.0-rc1)
|
||||
f13c0886 NEWS: fix date
|
||||
74555950 Merge "vwebp: fix window title when options are given" into 1.4.0
|
||||
d781646c vwebp: fix window title when options are given
|
||||
|
@ -32,7 +32,7 @@ PLATFORM_LDFLAGS = /SAFESEH
|
||||
NOLOGO = /nologo
|
||||
CCNODBG = cl.exe $(NOLOGO) /O2 /DNDEBUG
|
||||
CCDEBUG = cl.exe $(NOLOGO) /Od /Zi /D_DEBUG /RTC1
|
||||
CFLAGS = /I. /Isrc $(NOLOGO) /W3 /EHsc /c
|
||||
CFLAGS = /I. /Isrc $(NOLOGO) /MP /W3 /EHsc /c
|
||||
CFLAGS = $(CFLAGS) /DWIN32 /D_CRT_SECURE_NO_WARNINGS /DWIN32_LEAN_AND_MEAN
|
||||
LDFLAGS = /LARGEADDRESSAWARE /MANIFEST:EMBED /NXCOMPAT /DYNAMICBASE
|
||||
LDFLAGS = $(LDFLAGS) $(PLATFORM_LDFLAGS)
|
||||
@ -231,6 +231,7 @@ DSP_DEC_OBJS = \
|
||||
$(DIROBJ)\dsp\lossless_neon.obj \
|
||||
$(DIROBJ)\dsp\lossless_sse2.obj \
|
||||
$(DIROBJ)\dsp\lossless_sse41.obj \
|
||||
$(DIROBJ)\dsp\lossless_avx2.obj \
|
||||
$(DIROBJ)\dsp\rescaler.obj \
|
||||
$(DIROBJ)\dsp\rescaler_mips32.obj \
|
||||
$(DIROBJ)\dsp\rescaler_mips_dsp_r2.obj \
|
||||
@ -270,6 +271,7 @@ DSP_ENC_OBJS = \
|
||||
$(DIROBJ)\dsp\lossless_enc_neon.obj \
|
||||
$(DIROBJ)\dsp\lossless_enc_sse2.obj \
|
||||
$(DIROBJ)\dsp\lossless_enc_sse41.obj \
|
||||
$(DIROBJ)\dsp\lossless_enc_avx2.obj \
|
||||
$(DIROBJ)\dsp\ssim.obj \
|
||||
$(DIROBJ)\dsp\ssim_sse2.obj \
|
||||
|
||||
@ -393,7 +395,7 @@ $(DIRBIN)\dwebp.exe: $(IMAGEIO_UTIL_OBJS)
|
||||
$(DIRBIN)\dwebp.exe: $(LIBWEBPDEMUX)
|
||||
$(DIRBIN)\gif2webp.exe: $(DIROBJ)\examples\gif2webp.obj $(EX_GIF_DEC_OBJS)
|
||||
$(DIRBIN)\gif2webp.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS) $(LIBWEBPMUX)
|
||||
$(DIRBIN)\gif2webp.exe: $(LIBWEBP)
|
||||
$(DIRBIN)\gif2webp.exe: $(LIBWEBP) $(LIBSHARPYUV)
|
||||
$(DIRBIN)\vwebp.exe: $(DIROBJ)\examples\vwebp.obj $(EX_UTIL_OBJS)
|
||||
$(DIRBIN)\vwebp.exe: $(IMAGEIO_UTIL_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
|
||||
$(DIRBIN)\vwebp_sdl.exe: $(DIROBJ)\extras\vwebp_sdl.obj
|
||||
|
26
NEWS
26
NEWS
@ -1,4 +1,26 @@
|
||||
- 4/2/2024: version 1.4.0
|
||||
- 12/19/2024 version 1.5.0
|
||||
This is a binary compatible release.
|
||||
API changes:
|
||||
- `cross_color_transform_bits` added to WebPAuxStats
|
||||
* minor lossless encoder speed and compression improvements
|
||||
* lossless encoding does not use floats anymore
|
||||
* additional Arm optimizations for lossy & lossless + general code generation
|
||||
improvements
|
||||
* improvements to WASM performance (#643)
|
||||
* improvements and corrections in webp-container-spec.txt and
|
||||
webp-lossless-bitstream-spec.txt (#646, #355607636)
|
||||
* further security related hardening and increased fuzzing coverage w/fuzztest
|
||||
(oss-fuzz: #382816119, #70112, #70102, #69873, #69825, #69508, #69208)
|
||||
* miscellaneous warning, bug & build fixes (#499, #562, #381372617,
|
||||
#381109771, #42340561, #375011696, #372109644, chromium: #334120888)
|
||||
Tool updates:
|
||||
* gif2webp: add -sharp_yuv & -near_lossless
|
||||
* img2webp: add -exact & -noexact
|
||||
* exit codes normalized; running an example program with no
|
||||
arguments will output its help and exit with an error (#42340557,
|
||||
#381372617)
|
||||
|
||||
- 4/12/2024: version 1.4.0
|
||||
This is a binary compatible release.
|
||||
* API changes:
|
||||
- libwebpmux: WebPAnimEncoderSetChunk, WebPAnimEncoderGetChunk,
|
||||
@ -7,6 +29,8 @@
|
||||
- extras: SharpYuvEstimate420Risk
|
||||
* further security related hardening in libwebp & examples
|
||||
* some minor optimizations in the lossless encoder
|
||||
* added WEBP_NODISCARD to report unused result warnings; enable with
|
||||
-DWEBP_ENABLE_NODISCARD=1
|
||||
* improvements and corrections in webp-container-spec.txt and
|
||||
webp-lossless-bitstream-spec.txt (#611)
|
||||
* miscellaneous warning, bug & build fixes (#615, #619, #632, #635)
|
||||
|
@ -7,7 +7,7 @@
|
||||
\__\__/\____/\_____/__/ ____ ___
|
||||
/ _/ / \ \ / _ \/ _/
|
||||
/ \_/ / / \ \ __/ \__
|
||||
\____/____/\_____/_____/____/v1.4.0
|
||||
\____/____/\_____/_____/____/v1.5.0
|
||||
```
|
||||
|
||||
WebP codec is a library to encode and decode images in WebP format. This package
|
||||
@ -42,7 +42,8 @@ See the [APIs documentation](doc/api.md), and API usage examples in the
|
||||
|
||||
## Bugs
|
||||
|
||||
Please report all bugs to the issue tracker: https://bugs.chromium.org/p/webp
|
||||
Please report all bugs to the [issue tracker](https://issues.webmproject.org).
|
||||
For security reports, select 'Security report' from the Template dropdown.
|
||||
|
||||
Patches welcome! See [how to contribute](CONTRIBUTING.md).
|
||||
|
||||
|
@ -94,6 +94,9 @@
|
||||
/* Set to 1 if SSE4.1 is supported */
|
||||
#cmakedefine WEBP_HAVE_SSE41 1
|
||||
|
||||
/* Set to 1 if AVX2 is supported */
|
||||
#cmakedefine WEBP_HAVE_AVX2 1
|
||||
|
||||
/* Set to 1 if TIFF library is installed */
|
||||
#cmakedefine WEBP_HAVE_TIFF 1
|
||||
|
||||
|
@ -38,9 +38,9 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
|
||||
endfunction()
|
||||
|
||||
# those are included in the names of WEBP_USE_* in c++ code.
|
||||
set(WEBP_SIMD_FLAGS "SSE41;SSE2;MIPS32;MIPS_DSP_R2;NEON;MSA")
|
||||
set(WEBP_SIMD_FLAGS "AVX2;SSE41;SSE2;MIPS32;MIPS_DSP_R2;NEON;MSA")
|
||||
set(WEBP_SIMD_FILE_EXTENSIONS
|
||||
"_sse41.c;_sse2.c;_mips32.c;_mips_dsp_r2.c;_neon.c;_msa.c")
|
||||
"_avx2.c;_sse41.c;_sse2.c;_mips32.c;_mips_dsp_r2.c;_neon.c;_msa.c")
|
||||
if(MSVC AND CMAKE_C_COMPILER_ID STREQUAL "MSVC")
|
||||
# With at least Visual Studio 12 (2013)+ /arch is not necessary to build SSE2
|
||||
# or SSE4 code unless a lesser /arch is forced. MSVC does not have a SSE4
|
||||
@ -50,12 +50,12 @@ if(MSVC AND CMAKE_C_COMPILER_ID STREQUAL "MSVC")
|
||||
if(MSVC_VERSION GREATER_EQUAL 1800 AND NOT CMAKE_C_FLAGS MATCHES "/arch:")
|
||||
set(SIMD_ENABLE_FLAGS)
|
||||
else()
|
||||
set(SIMD_ENABLE_FLAGS "/arch:AVX;/arch:SSE2;;;;")
|
||||
set(SIMD_ENABLE_FLAGS "/arch:AVX2;/arch:AVX;/arch:SSE2;;;;")
|
||||
endif()
|
||||
set(SIMD_DISABLE_FLAGS)
|
||||
else()
|
||||
set(SIMD_ENABLE_FLAGS "-msse4.1;-msse2;-mips32;-mdspr2;-mfpu=neon;-mmsa")
|
||||
set(SIMD_DISABLE_FLAGS "-mno-sse4.1;-mno-sse2;;-mno-dspr2;;-mno-msa")
|
||||
set(SIMD_ENABLE_FLAGS "-mavx2;-msse4.1;-msse2;-mips32;-mdspr2;-mfpu=neon;-mmsa")
|
||||
set(SIMD_DISABLE_FLAGS "-mno-avx2;-mno-sse4.1;-mno-sse2;;-mno-dspr2;;-mno-msa")
|
||||
endif()
|
||||
|
||||
set(WEBP_SIMD_FILES_TO_INCLUDE)
|
||||
|
23
configure.ac
23
configure.ac
@ -1,5 +1,5 @@
|
||||
AC_INIT([libwebp], [1.4.0],
|
||||
[https://bugs.chromium.org/p/webp],,
|
||||
AC_INIT([libwebp], [1.5.0],
|
||||
[https://issues.webmproject.org],,
|
||||
[https://developers.google.com/speed/webp])
|
||||
AC_CANONICAL_HOST
|
||||
AC_PREREQ([2.60])
|
||||
@ -161,6 +161,25 @@ AS_IF([test "$GCC" = "yes" ], [
|
||||
AC_SUBST([AM_CFLAGS])
|
||||
|
||||
dnl === Check for machine specific flags
|
||||
AC_ARG_ENABLE([avx2],
|
||||
AS_HELP_STRING([--disable-avx2],
|
||||
[Disable detection of AVX2 support
|
||||
@<:@default=auto@:>@]))
|
||||
|
||||
AS_IF([test "x$enable_avx2" != "xno" -a "x$enable_sse4_1" != "xno"
|
||||
-a "x$enable_sse2" != "xno"], [
|
||||
AVX2_FLAGS="$INTRINSICS_CFLAGS $AVX2_FLAGS"
|
||||
TEST_AND_ADD_CFLAGS([AVX2_FLAGS], [-mavx2])
|
||||
AS_IF([test -n "$AVX2_FLAGS"], [
|
||||
SAVED_CFLAGS=$CFLAGS
|
||||
CFLAGS="$CFLAGS $AVX2_FLAGS"
|
||||
AC_CHECK_HEADER([immintrin.h],
|
||||
[AC_DEFINE(WEBP_HAVE_AVX2, [1],
|
||||
[Set to 1 if AVX2 is supported])],
|
||||
[AVX2_FLAGS=""])
|
||||
CFLAGS=$SAVED_CFLAGS])
|
||||
AC_SUBST([AVX2_FLAGS])])
|
||||
|
||||
AC_ARG_ENABLE([sse4.1],
|
||||
AS_HELP_STRING([--disable-sse4.1],
|
||||
[Disable detection of SSE4.1 support
|
||||
|
@ -228,4 +228,4 @@ generated code, but is untested.
|
||||
## Javascript decoder
|
||||
|
||||
Libwebp can be compiled into a JavaScript decoder using Emscripten and CMake.
|
||||
See the [corresponding documentation](../README.md)
|
||||
See the [corresponding documentation](../webp_js/README.md)
|
||||
|
@ -17,10 +17,11 @@ rubygems will install automatically. The following will apply inline CSS
|
||||
styling; an external stylesheet is not needed.
|
||||
|
||||
```shell
|
||||
$ kramdown doc/webp-lossless-bitstream-spec.txt --template \
|
||||
doc/template.html --coderay-css style --coderay-line-numbers ' ' \
|
||||
--coderay-default-lang c > \
|
||||
doc/output/webp-lossless-bitstream-spec.html
|
||||
$ kramdown doc/webp-lossless-bitstream-spec.txt \
|
||||
--template doc/template.html \
|
||||
-x syntax-coderay --syntax-highlighter coderay \
|
||||
--syntax-highlighter-opts "{default_lang: c, line_numbers: , css: style}" \
|
||||
> doc/output/webp-lossless-bitstream-spec.html
|
||||
```
|
||||
|
||||
Optimally, use kramdown 0.13.7 or newer if syntax highlighting desired.
|
||||
|
15
doc/tools.md
15
doc/tools.md
@ -321,10 +321,13 @@ Per-frame options (only used for subsequent images input):
|
||||
|
||||
```
|
||||
-d <int> ............. frame duration in ms (default: 100)
|
||||
-lossless ........... use lossless mode (default)
|
||||
-lossy ... ........... use lossy mode
|
||||
-lossless ............ use lossless mode (default)
|
||||
-lossy ............... use lossy mode
|
||||
-q <float> ........... quality
|
||||
-m <int> ............. method to use
|
||||
-m <int> ............. compression method (0=fast, 6=slowest), default=4
|
||||
-exact, -noexact ..... preserve or alter RGB values in transparent area
|
||||
(default: -noexact, may cause artifacts
|
||||
with lossy animations)
|
||||
```
|
||||
|
||||
example: `img2webp -loop 2 in0.png -lossy in1.jpg -d 80 in2.tiff -o out.webp`
|
||||
@ -351,8 +354,12 @@ Options:
|
||||
-lossy ................. encode image using lossy compression
|
||||
-mixed ................. for each frame in the image, pick lossy
|
||||
or lossless compression heuristically
|
||||
-near_lossless <int> ... use near-lossless image preprocessing
|
||||
(0..100=off), default=100
|
||||
-sharp_yuv ............. use sharper (and slower) RGB->YUV conversion
|
||||
(lossy only)
|
||||
-q <float> ............. quality factor (0:small..100:big)
|
||||
-m <int> ............... compression method (0=fast, 6=slowest)
|
||||
-m <int> ............... compression method (0=fast, 6=slowest), default=4
|
||||
-min_size .............. minimize output size (default:off)
|
||||
lossless compression by default; can be
|
||||
combined with -q, -m, -lossy or -mixed
|
||||
|
@ -131,7 +131,7 @@ Chunk Payload: _Chunk Size_ bytes
|
||||
: The data payload. If _Chunk Size_ is odd, a single padding byte -- which MUST
|
||||
be `0` to conform with RIFF -- is added.
|
||||
|
||||
**Note:** RIFF has a convention that all-uppercase chunk FourCCs are standard
|
||||
**Note**: RIFF has a convention that all-uppercase chunk FourCCs are standard
|
||||
chunks that apply to any RIFF file format, while FourCCs specific to a file
|
||||
format are all lowercase. WebP does not follow this convention.
|
||||
|
||||
@ -220,7 +220,7 @@ use another conversion method, but visual results may differ among decoders.
|
||||
Simple File Format (Lossless)
|
||||
-----------------------------
|
||||
|
||||
**Note:** Older readers may not support files using the lossless format.
|
||||
**Note**: Older readers may not support files using the lossless format.
|
||||
|
||||
This layout SHOULD be used if the image requires _lossless_ encoding (with an
|
||||
optional transparency channel) and does not require advanced features provided
|
||||
@ -262,7 +262,7 @@ and height of the canvas.
|
||||
Extended File Format
|
||||
--------------------
|
||||
|
||||
**Note:** Older readers may not support files using the extended format.
|
||||
**Note**: Older readers may not support files using the extended format.
|
||||
|
||||
An extended format file consists of:
|
||||
|
||||
@ -290,12 +290,12 @@ up of:
|
||||
For an _animated image_, the _image data_ consists of multiple frames. More
|
||||
details about frames can be found in the [Animation](#animation) section.
|
||||
|
||||
All chunks necessary for reconstruction and color correction, that is 'VP8X',
|
||||
'ICCP', 'ANIM', 'ANMF', 'ALPH', 'VP8 ' and 'VP8L', MUST appear in the order
|
||||
All chunks necessary for reconstruction and color correction, that is, 'VP8X',
|
||||
'ICCP', 'ANIM', 'ANMF', 'ALPH', 'VP8 ', and 'VP8L', MUST appear in the order
|
||||
described earlier. Readers SHOULD fail when chunks necessary for reconstruction
|
||||
and color correction are out of order.
|
||||
|
||||
[Metadata](#metadata) and [unknown](#unknown-chunks) chunks MAY appear out of
|
||||
[Metadata](#metadata) and [unknown chunks](#unknown-chunks) MAY appear out of
|
||||
order.
|
||||
|
||||
**Rationale:** The chunks necessary for reconstruction should appear first in
|
||||
@ -401,7 +401,7 @@ Background Color: 32 bits (_uint32_)
|
||||
around the frames, as well as the transparent pixels of the first frame.
|
||||
The background color is also used when the Disposal method is `1`.
|
||||
|
||||
**Note**:
|
||||
**Notes**:
|
||||
|
||||
* The background color MAY contain a non-opaque alpha value, even if the
|
||||
_Alpha_ flag in the ['VP8X' Chunk](#extended_header) is unset.
|
||||
@ -525,7 +525,7 @@ Disposal method (D): 1 bit
|
||||
not present, standard RGB (sRGB) is to be assumed. (Note that sRGB also
|
||||
needs to be linearized due to a gamma of ~2.2.)
|
||||
|
||||
Frame Data: _Chunk Size_ - `16` bytes
|
||||
Frame Data: _Chunk Size_ bytes - `16`
|
||||
|
||||
: Consists of:
|
||||
|
||||
@ -616,7 +616,7 @@ Compression method (C): 2 bits
|
||||
* `0`: No compression.
|
||||
* `1`: Compressed using the WebP lossless format.
|
||||
|
||||
Alpha bitstream: _Chunk Size_ - `1` bytes
|
||||
Alpha bitstream: _Chunk Size_ bytes - `1`
|
||||
|
||||
: Encoded alpha bitstream.
|
||||
|
||||
@ -781,7 +781,8 @@ _VP8X.field_ means the field in the 'VP8X' Chunk with the same description.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
VP8X.flags.hasAnimation MUST be TRUE
|
||||
canvas ← new image of size VP8X.canvasWidth x VP8X.canvasHeight with
|
||||
background color ANIM.background_color.
|
||||
background color ANIM.background_color or
|
||||
application-defined color.
|
||||
loop_count ← ANIM.loopCount
|
||||
dispose_method ← Dispose to background color
|
||||
if loop_count == 0:
|
||||
@ -809,6 +810,7 @@ for loop = 0..loop_count - 1
|
||||
bitstream subchunks not found in 'Frame Data' earlier MUST
|
||||
be TRUE
|
||||
frame_params.bitstream = bitstream_data
|
||||
apply dispose_method.
|
||||
render frame with frame_params.alpha and frame_params.bitstream
|
||||
on canvas with top-left corner at (frame_params.frameX,
|
||||
frame_params.frameY), using Blending method
|
||||
|
@ -351,7 +351,7 @@ int ClampAddSubtractHalf(int a, int b) {
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are special handling rules for some border pixels. If there is a
|
||||
prediction transform, regardless of the mode \[0..13\] for these pixels, the
|
||||
predictor transform, regardless of the mode \[0..13\] for these pixels, the
|
||||
predicted value for the left-topmost pixel of the image is 0xff000000, all
|
||||
pixels on the top row are L-pixel, and all pixels on the leftmost column are
|
||||
T-pixel.
|
||||
@ -436,8 +436,8 @@ should be interpreted as an 8-bit two's complement number (that is: uint8 range
|
||||
|
||||
The multiplication is to be done using more precision (with at least 16-bit
|
||||
precision). The sign extension property of the shift operation does not matter
|
||||
here; only the lowest 8 bits are used from the result, and there the sign
|
||||
extension shifting and unsigned shifting are consistent with each other.
|
||||
here; only the lowest 8 bits are used from the result, and in these bits, the
|
||||
sign extension shifting and unsigned shifting are consistent with each other.
|
||||
|
||||
Now, we describe the contents of color transform data so that decoding can apply
|
||||
the inverse color transform and recover the original red and blue values. The
|
||||
@ -613,8 +613,8 @@ We use image data in five different roles:
|
||||
1. Color transform image: Created by `ColorTransformElement` values
|
||||
(defined in ["Color Transform"](#color-transform)) for different blocks of
|
||||
the image.
|
||||
1. Color indexing image: An array of size `color_table_size` (up to 256 ARGB
|
||||
values) storing the metadata for the color indexing transform (see
|
||||
1. Color indexing image: An array of the size of `color_table_size` (up to
|
||||
256 ARGB values) that stores metadata for the color indexing transform (see
|
||||
["Color Indexing Transform"](#color-indexing-transform)).
|
||||
|
||||
### 5.2 Encoding of Image Data
|
||||
|
@ -67,7 +67,7 @@ dwebp_LDADD += ../src/libwebp.la
|
||||
dwebp_LDADD +=$(PNG_LIBS) $(JPEG_LIBS)
|
||||
|
||||
gif2webp_SOURCES = gif2webp.c gifdec.c gifdec.h
|
||||
gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(GIF_INCLUDES)
|
||||
gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(GIF_INCLUDES) -I$(top_srcdir)
|
||||
gif2webp_LDADD =
|
||||
gif2webp_LDADD += libexample_util.la
|
||||
gif2webp_LDADD += ../imageio/libimageio_util.la
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> // for 'strtod'.
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for 'strcmp'.
|
||||
|
||||
#include "./anim_util.h"
|
||||
@ -206,8 +206,9 @@ static void Help(void) {
|
||||
printf(" -version ............ print version number and exit\n");
|
||||
}
|
||||
|
||||
// Returns 0 on success, 1 if animation files differ, and 2 for any error.
|
||||
int main(int argc, const char* argv[]) {
|
||||
int return_code = -1;
|
||||
int return_code = 2;
|
||||
int dump_frames = 0;
|
||||
const char* dump_folder = NULL;
|
||||
double min_psnr = 0.;
|
||||
@ -269,18 +270,18 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
if (parse_error) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(return_code);
|
||||
}
|
||||
}
|
||||
if (argc < 3) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(return_code);
|
||||
}
|
||||
|
||||
|
||||
if (!got_input2) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(return_code);
|
||||
}
|
||||
|
||||
if (dump_frames) {
|
||||
@ -293,7 +294,7 @@ int main(int argc, const char* argv[]) {
|
||||
if (!ReadAnimatedImage(files[i], &images[i], dump_frames, dump_folder)) {
|
||||
WFPRINTF(stderr, "Error decoding file: %s\n Aborting.\n",
|
||||
(const W_CHAR*)files[i]);
|
||||
return_code = -2;
|
||||
return_code = 2;
|
||||
goto End;
|
||||
} else {
|
||||
MinimizeAnimationFrames(&images[i], max_diff);
|
||||
@ -304,7 +305,7 @@ int main(int argc, const char* argv[]) {
|
||||
premultiply, min_psnr)) {
|
||||
WFPRINTF(stderr, "\nFiles %s and %s differ.\n", (const W_CHAR*)files[0],
|
||||
(const W_CHAR*)files[1]);
|
||||
return_code = -3;
|
||||
return_code = 1;
|
||||
} else {
|
||||
WPRINTF("\nFiles %s and %s are identical.\n", (const W_CHAR*)files[0],
|
||||
(const W_CHAR*)files[1]);
|
||||
|
@ -12,6 +12,7 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for 'strcmp'.
|
||||
|
||||
#include "./anim_util.h"
|
||||
@ -35,6 +36,7 @@ static void Help(void) {
|
||||
printf(" -version ............ print version number and exit\n");
|
||||
}
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, const char* argv[]) {
|
||||
int error = 0;
|
||||
const W_CHAR* dump_folder = TO_W_CHAR(".");
|
||||
@ -47,7 +49,7 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
if (argc < 2) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (c = 1; !error && c < argc; ++c) {
|
||||
@ -73,7 +75,7 @@ int main(int argc, const char* argv[]) {
|
||||
suffix = TO_W_CHAR("pam");
|
||||
} else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-version")) {
|
||||
int dec_version, demux_version;
|
||||
GetAnimatedImageVersions(&dec_version, &demux_version);
|
||||
@ -82,7 +84,7 @@ int main(int argc, const char* argv[]) {
|
||||
(dec_version >> 0) & 0xff,
|
||||
(demux_version >> 16) & 0xff, (demux_version >> 8) & 0xff,
|
||||
(demux_version >> 0) & 0xff);
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else {
|
||||
uint32_t i;
|
||||
AnimatedImage image;
|
||||
@ -121,5 +123,5 @@ int main(int argc, const char* argv[]) {
|
||||
ClearAnimatedImage(&image);
|
||||
}
|
||||
}
|
||||
FREE_WARGV_AND_RETURN(error ? 1 : 0);
|
||||
FREE_WARGV_AND_RETURN(error ? EXIT_FAILURE : EXIT_SUCCESS);
|
||||
}
|
||||
|
@ -771,6 +771,7 @@ void GetDiffAndPSNR(const uint8_t rgba1[], const uint8_t rgba2[],
|
||||
*psnr = 99.; // PSNR when images are identical.
|
||||
} else {
|
||||
sse /= stride * height;
|
||||
assert(sse != 0.0);
|
||||
*psnr = 4.3429448 * log(255. * 255. / sse);
|
||||
}
|
||||
}
|
||||
|
@ -178,8 +178,14 @@ static void PrintFullLosslessInfo(const WebPAuxStats* const stats,
|
||||
if (stats->lossless_features & 8) fprintf(stderr, " PALETTE");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
fprintf(stderr, " * Precision Bits: histogram=%d transform=%d cache=%d\n",
|
||||
stats->histogram_bits, stats->transform_bits, stats->cache_bits);
|
||||
fprintf(stderr, " * Precision Bits: histogram=%d", stats->histogram_bits);
|
||||
if (stats->lossless_features & 1) {
|
||||
fprintf(stderr, " prediction=%d", stats->transform_bits);
|
||||
}
|
||||
if (stats->lossless_features & 2) {
|
||||
fprintf(stderr, " cross-color=%d", stats->cross_color_transform_bits);
|
||||
}
|
||||
fprintf(stderr, " cache=%d\n", stats->cache_bits);
|
||||
if (stats->palette_size > 0) {
|
||||
fprintf(stderr, " * Palette size: %d\n", stats->palette_size);
|
||||
}
|
||||
@ -651,8 +657,9 @@ static const char* const kErrorMessages[VP8_ENC_ERROR_LAST] = {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, const char* argv[]) {
|
||||
int return_value = -1;
|
||||
int return_value = EXIT_FAILURE;
|
||||
const char* in_file = NULL, *out_file = NULL, *dump_file = NULL;
|
||||
FILE* out = NULL;
|
||||
int c;
|
||||
@ -686,22 +693,22 @@ int main(int argc, const char* argv[]) {
|
||||
!WebPPictureInit(&original_picture) ||
|
||||
!WebPConfigInit(&config)) {
|
||||
fprintf(stderr, "Error! Version mismatch!\n");
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (argc == 1) {
|
||||
HelpShort();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (c = 1; c < argc; ++c) {
|
||||
int parse_error = 0;
|
||||
if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
|
||||
HelpShort();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-H") || !strcmp(argv[c], "-longhelp")) {
|
||||
HelpLong();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-o") && c + 1 < argc) {
|
||||
out_file = (const char*)GET_WARGV(argv, ++c);
|
||||
} else if (!strcmp(argv[c], "-d") && c + 1 < argc) {
|
||||
@ -842,7 +849,7 @@ int main(int argc, const char* argv[]) {
|
||||
printf("libsharpyuv: %d.%d.%d\n",
|
||||
(sharpyuv_version >> 24) & 0xff, (sharpyuv_version >> 16) & 0xffff,
|
||||
sharpyuv_version & 0xff);
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-progress")) {
|
||||
show_progress = 1;
|
||||
} else if (!strcmp(argv[c], "-quiet")) {
|
||||
@ -904,7 +911,7 @@ int main(int argc, const char* argv[]) {
|
||||
if (i == kNumTokens) {
|
||||
fprintf(stderr, "Error! Unknown metadata type '%.*s'\n",
|
||||
(int)(token - start), start);
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
start = token + 1;
|
||||
}
|
||||
@ -923,14 +930,14 @@ int main(int argc, const char* argv[]) {
|
||||
} else if (argv[c][0] == '-') {
|
||||
fprintf(stderr, "Error! Unknown option '%s'\n", argv[c]);
|
||||
HelpLong();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
} else {
|
||||
in_file = (const char*)GET_WARGV(argv, c);
|
||||
}
|
||||
|
||||
if (parse_error) {
|
||||
HelpLong();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
if (in_file == NULL) {
|
||||
@ -1231,7 +1238,7 @@ int main(int argc, const char* argv[]) {
|
||||
PrintMetadataInfo(&metadata, metadata_written);
|
||||
}
|
||||
}
|
||||
return_value = 0;
|
||||
return_value = EXIT_SUCCESS;
|
||||
|
||||
Error:
|
||||
WebPMemoryWriterClear(&memory_writer);
|
||||
|
@ -177,6 +177,7 @@ static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
|
||||
return external_buffer;
|
||||
}
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, const char* argv[]) {
|
||||
int ok = 0;
|
||||
const char* in_file = NULL;
|
||||
@ -197,14 +198,14 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
if (!WebPInitDecoderConfig(&config)) {
|
||||
fprintf(stderr, "Library version mismatch!\n");
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (c = 1; c < argc; ++c) {
|
||||
int parse_error = 0;
|
||||
if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-o") && c < argc - 1) {
|
||||
out_file = (const char*)GET_WARGV(argv, ++c);
|
||||
} else if (!strcmp(argv[c], "-alpha")) {
|
||||
@ -227,7 +228,7 @@ int main(int argc, const char* argv[]) {
|
||||
const int version = WebPGetDecoderVersion();
|
||||
printf("%d.%d.%d\n",
|
||||
(version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-pgm")) {
|
||||
format = PGM;
|
||||
} else if (!strcmp(argv[c], "-yuv")) {
|
||||
@ -293,21 +294,21 @@ int main(int argc, const char* argv[]) {
|
||||
} else if (argv[c][0] == '-') {
|
||||
fprintf(stderr, "Unknown option '%s'\n", argv[c]);
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
} else {
|
||||
in_file = (const char*)GET_WARGV(argv, c);
|
||||
}
|
||||
|
||||
if (parse_error) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
if (in_file == NULL) {
|
||||
fprintf(stderr, "missing input file!!\n");
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (quiet) verbose = 0;
|
||||
@ -316,7 +317,7 @@ int main(int argc, const char* argv[]) {
|
||||
VP8StatusCode status = VP8_STATUS_OK;
|
||||
size_t data_size = 0;
|
||||
if (!LoadWebP(in_file, &data, &data_size, bitstream)) {
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
@ -415,7 +416,7 @@ int main(int argc, const char* argv[]) {
|
||||
WebPFreeDecBuffer(output_buffer);
|
||||
WebPFree((void*)external_buffer);
|
||||
WebPFree((void*)data);
|
||||
FREE_WARGV_AND_RETURN(ok ? 0 : -1);
|
||||
FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -28,6 +28,7 @@
|
||||
#endif
|
||||
|
||||
#include <gif_lib.h>
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
#include "webp/encode.h"
|
||||
#include "webp/mux.h"
|
||||
#include "../examples/example_util.h"
|
||||
@ -70,8 +71,14 @@ static void Help(void) {
|
||||
printf(" -lossy ................. encode image using lossy compression\n");
|
||||
printf(" -mixed ................. for each frame in the image, pick lossy\n"
|
||||
" or lossless compression heuristically\n");
|
||||
printf(" -near_lossless <int> ... use near-lossless image preprocessing\n"
|
||||
" (0..100=off), default=100\n");
|
||||
printf(" -sharp_yuv ............. use sharper (and slower) RGB->YUV "
|
||||
"conversion\n"
|
||||
" (lossy only)\n");
|
||||
printf(" -q <float> ............. quality factor (0:small..100:big)\n");
|
||||
printf(" -m <int> ............... compression method (0=fast, 6=slowest)\n");
|
||||
printf(" -m <int> ............... compression method (0=fast, 6=slowest), "
|
||||
"default=4\n");
|
||||
printf(" -min_size .............. minimize output size (default:off)\n"
|
||||
" lossless compression by default; can be\n"
|
||||
" combined with -q, -m, -lossy or -mixed\n"
|
||||
@ -96,6 +103,7 @@ static void Help(void) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, const char* argv[]) {
|
||||
int verbose = 0;
|
||||
int gif_error = GIF_ERROR;
|
||||
@ -140,7 +148,7 @@ int main(int argc, const char* argv[]) {
|
||||
!WebPPictureInit(&frame) || !WebPPictureInit(&curr_canvas) ||
|
||||
!WebPPictureInit(&prev_canvas)) {
|
||||
fprintf(stderr, "Error! Version mismatch!\n");
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
config.lossless = 1; // Use lossless compression by default.
|
||||
|
||||
@ -150,14 +158,14 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
if (argc == 1) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (c = 1; c < argc; ++c) {
|
||||
int parse_error = 0;
|
||||
if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-o") && c < argc - 1) {
|
||||
out_file = GET_WARGV(argv, ++c);
|
||||
} else if (!strcmp(argv[c], "-lossy")) {
|
||||
@ -165,6 +173,10 @@ int main(int argc, const char* argv[]) {
|
||||
} else if (!strcmp(argv[c], "-mixed")) {
|
||||
enc_options.allow_mixed = 1;
|
||||
config.lossless = 0;
|
||||
} else if (!strcmp(argv[c], "-near_lossless") && c < argc - 1) {
|
||||
config.near_lossless = ExUtilGetInt(argv[++c], 0, &parse_error);
|
||||
} else if (!strcmp(argv[c], "-sharp_yuv")) {
|
||||
config.use_sharp_yuv = 1;
|
||||
} else if (!strcmp(argv[c], "-loop_compatibility")) {
|
||||
loop_compatibility = 1;
|
||||
} else if (!strcmp(argv[c], "-q") && c < argc - 1) {
|
||||
@ -216,7 +228,7 @@ int main(int argc, const char* argv[]) {
|
||||
fprintf(stderr, "Error! Unknown metadata type '%.*s'\n",
|
||||
(int)(token - start), start);
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
start = token + 1;
|
||||
}
|
||||
@ -225,11 +237,14 @@ int main(int argc, const char* argv[]) {
|
||||
} else if (!strcmp(argv[c], "-version")) {
|
||||
const int enc_version = WebPGetEncoderVersion();
|
||||
const int mux_version = WebPGetMuxVersion();
|
||||
const int sharpyuv_version = SharpYuvGetVersion();
|
||||
printf("WebP Encoder version: %d.%d.%d\nWebP Mux version: %d.%d.%d\n",
|
||||
(enc_version >> 16) & 0xff, (enc_version >> 8) & 0xff,
|
||||
enc_version & 0xff, (mux_version >> 16) & 0xff,
|
||||
(mux_version >> 8) & 0xff, mux_version & 0xff);
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
printf("libsharpyuv: %d.%d.%d\n", (sharpyuv_version >> 24) & 0xff,
|
||||
(sharpyuv_version >> 16) & 0xffff, sharpyuv_version & 0xff);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-quiet")) {
|
||||
quiet = 1;
|
||||
enc_options.verbose = 0;
|
||||
@ -242,14 +257,14 @@ int main(int argc, const char* argv[]) {
|
||||
} else if (argv[c][0] == '-') {
|
||||
fprintf(stderr, "Error! Unknown option '%s'\n", argv[c]);
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
} else {
|
||||
in_file = GET_WARGV(argv, c);
|
||||
}
|
||||
|
||||
if (parse_error) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
@ -593,7 +608,7 @@ int main(int argc, const char* argv[]) {
|
||||
#endif
|
||||
}
|
||||
|
||||
FREE_WARGV_AND_RETURN(!ok);
|
||||
FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#else // !WEBP_HAVE_GIF
|
||||
@ -601,7 +616,7 @@ int main(int argc, const char* argv[]) {
|
||||
int main(int argc, const char* argv[]) {
|
||||
fprintf(stderr, "GIF support not enabled in %s.\n", argv[0]);
|
||||
(void)argc;
|
||||
return 0;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -59,10 +59,15 @@ static void Help(void) {
|
||||
|
||||
printf("Per-frame options (only used for subsequent images input):\n");
|
||||
printf(" -d <int> ............. frame duration in ms (default: 100)\n");
|
||||
printf(" -lossless ........... use lossless mode (default)\n");
|
||||
printf(" -lossy ... ........... use lossy mode\n");
|
||||
printf(" -lossless ............ use lossless mode (default)\n");
|
||||
printf(" -lossy ............... use lossy mode\n");
|
||||
printf(" -q <float> ........... quality\n");
|
||||
printf(" -m <int> ............. method to use\n");
|
||||
printf(" -m <int> ............. compression method (0=fast, 6=slowest), "
|
||||
"default=4\n");
|
||||
printf(" -exact, -noexact ..... preserve or alter RGB values in transparent "
|
||||
"area\n"
|
||||
" (default: -noexact, may cause artifacts\n"
|
||||
" with lossy animations)\n");
|
||||
|
||||
printf("\n");
|
||||
printf("example: img2webp -loop 2 in0.png -lossy in1.jpg\n"
|
||||
@ -130,6 +135,7 @@ static int SetLoopCount(int loop_count, WebPData* const webp_data) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, const char* argv[]) {
|
||||
const char* output = NULL;
|
||||
WebPAnimEncoder* enc = NULL;
|
||||
@ -145,13 +151,14 @@ int main(int argc, const char* argv[]) {
|
||||
WebPData webp_data;
|
||||
int c;
|
||||
int have_input = 0;
|
||||
int last_input_index = 0;
|
||||
CommandLineArguments cmd_args;
|
||||
int ok;
|
||||
|
||||
INIT_WARGV(argc, argv);
|
||||
|
||||
ok = ExUtilInitCommandLineArguments(argc - 1, argv + 1, &cmd_args);
|
||||
if (!ok) FREE_WARGV_AND_RETURN(1);
|
||||
if (!ok) FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
|
||||
argc = cmd_args.argc_;
|
||||
argv = cmd_args.argv_;
|
||||
@ -199,7 +206,7 @@ int main(int argc, const char* argv[]) {
|
||||
verbose = 1;
|
||||
} else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-version")) {
|
||||
const int enc_version = WebPGetEncoderVersion();
|
||||
const int mux_version = WebPGetMuxVersion();
|
||||
@ -223,6 +230,8 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
if (!have_input) {
|
||||
fprintf(stderr, "No input file(s) for generating animation!\n");
|
||||
ok = 0;
|
||||
Help();
|
||||
goto End;
|
||||
}
|
||||
|
||||
@ -247,6 +256,10 @@ int main(int argc, const char* argv[]) {
|
||||
fprintf(stderr, "Invalid negative duration (%d)\n", duration);
|
||||
parse_error = 1;
|
||||
}
|
||||
} else if (!strcmp(argv[c], "-exact")) {
|
||||
config.exact = 1;
|
||||
} else if (!strcmp(argv[c], "-noexact")) {
|
||||
config.exact = 0;
|
||||
} else {
|
||||
parse_error = 1; // shouldn't be here.
|
||||
fprintf(stderr, "Unknown option [%s]\n", argv[c]);
|
||||
@ -267,6 +280,7 @@ int main(int argc, const char* argv[]) {
|
||||
// read next input image
|
||||
pic.use_argb = 1;
|
||||
ok = ReadImage((const char*)GET_WARGV_SHIFTED(argv, c), &pic);
|
||||
last_input_index = c;
|
||||
if (!ok) goto End;
|
||||
|
||||
if (enc == NULL) {
|
||||
@ -305,6 +319,13 @@ int main(int argc, const char* argv[]) {
|
||||
++pic_num;
|
||||
}
|
||||
|
||||
for (c = last_input_index + 1; c < argc; ++c) {
|
||||
if (argv[c] != NULL) {
|
||||
fprintf(stderr, "Warning: unused option [%s]!"
|
||||
" Frame options go before the input frame.\n", argv[c]);
|
||||
}
|
||||
}
|
||||
|
||||
// add a last fake frame to signal the last duration
|
||||
ok = ok && WebPAnimEncoderAdd(enc, NULL, timestamp_ms, NULL);
|
||||
ok = ok && WebPAnimEncoderAssemble(enc, &webp_data);
|
||||
@ -335,5 +356,5 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
WebPDataClear(&webp_data);
|
||||
ExUtilDeleteCommandLineArguments(&cmd_args);
|
||||
FREE_WARGV_AND_RETURN(ok ? 0 : 1);
|
||||
FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
@ -506,7 +506,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
if (!WebPInitDecoderConfig(config)) {
|
||||
fprintf(stderr, "Library version mismatch!\n");
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
config->options.dithering_strength = 50;
|
||||
config->options.alpha_dithering_strength = 100;
|
||||
@ -518,7 +518,7 @@ int main(int argc, char* argv[]) {
|
||||
int parse_error = 0;
|
||||
if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-noicc")) {
|
||||
kParams.use_color_profile = 0;
|
||||
} else if (!strcmp(argv[c], "-nofancy")) {
|
||||
@ -541,7 +541,7 @@ int main(int argc, char* argv[]) {
|
||||
(dec_version >> 16) & 0xff, (dec_version >> 8) & 0xff,
|
||||
dec_version & 0xff, (dmux_version >> 16) & 0xff,
|
||||
(dmux_version >> 8) & 0xff, dmux_version & 0xff);
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-mt")) {
|
||||
config->options.use_threads = 1;
|
||||
} else if (!strcmp(argv[c], "--")) {
|
||||
@ -553,7 +553,7 @@ int main(int argc, char* argv[]) {
|
||||
} else if (argv[c][0] == '-') {
|
||||
printf("Unknown option '%s'\n", argv[c]);
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
} else {
|
||||
kParams.file_name = (const char*)GET_WARGV(argv, c);
|
||||
file_name_argv_index = c;
|
||||
@ -561,14 +561,14 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
if (parse_error) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
if (kParams.file_name == NULL) {
|
||||
printf("missing input file!!\n");
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (!ImgIoUtilReadFile(kParams.file_name,
|
||||
@ -643,11 +643,11 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
// Should only be reached when using FREEGLUT:
|
||||
ClearParams();
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
|
||||
Error:
|
||||
ClearParams();
|
||||
FREE_WARGV_AND_RETURN(-1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#else // !WEBP_HAVE_GL
|
||||
@ -655,7 +655,7 @@ int main(int argc, char* argv[]) {
|
||||
int main(int argc, const char* argv[]) {
|
||||
fprintf(stderr, "OpenGL support not enabled in %s.\n", argv[0]);
|
||||
(void)argc;
|
||||
return 0;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "webp/config.h"
|
||||
@ -1120,6 +1121,7 @@ static void Help(void) {
|
||||
" -bitstream_info .... Parse bitstream header.\n");
|
||||
}
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, const char* argv[]) {
|
||||
int c, quiet = 0, show_diag = 0, show_summary = 0;
|
||||
int parse_bitstream = 0;
|
||||
@ -1130,7 +1132,7 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
if (argc == 1) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(WEBP_INFO_OK);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Parse command-line input.
|
||||
@ -1138,7 +1140,7 @@ int main(int argc, const char* argv[]) {
|
||||
if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help") ||
|
||||
!strcmp(argv[c], "-H") || !strcmp(argv[c], "-longhelp")) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(WEBP_INFO_OK);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-quiet")) {
|
||||
quiet = 1;
|
||||
} else if (!strcmp(argv[c], "-diag")) {
|
||||
@ -1151,7 +1153,7 @@ int main(int argc, const char* argv[]) {
|
||||
const int version = WebPGetDecoderVersion();
|
||||
printf("WebP Decoder version: %d.%d.%d\n",
|
||||
(version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else { // Assume the remaining are all input files.
|
||||
break;
|
||||
}
|
||||
@ -1159,7 +1161,7 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
if (c == argc) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(WEBP_INFO_INVALID_COMMAND);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Process input files one by one.
|
||||
@ -1182,5 +1184,6 @@ int main(int argc, const char* argv[]) {
|
||||
webp_info_status = AnalyzeWebP(&webp_info, &webp_data);
|
||||
WebPDataClear(&webp_data);
|
||||
}
|
||||
FREE_WARGV_AND_RETURN(webp_info_status);
|
||||
FREE_WARGV_AND_RETURN((webp_info_status == WEBP_INFO_OK) ? EXIT_SUCCESS
|
||||
: EXIT_FAILURE);
|
||||
}
|
||||
|
@ -59,6 +59,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webp/decode.h"
|
||||
#include "webp/mux.h"
|
||||
#include "../examples/example_util.h"
|
||||
@ -1225,6 +1226,7 @@ static int Process(const Config* config) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Main.
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, const char* argv[]) {
|
||||
Config config;
|
||||
int ok;
|
||||
@ -1238,7 +1240,7 @@ int main(int argc, const char* argv[]) {
|
||||
PrintHelp();
|
||||
}
|
||||
DeleteConfig(&config);
|
||||
FREE_WARGV_AND_RETURN(!ok);
|
||||
FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "webp/types.h"
|
||||
|
||||
#define XTRA_MAJ_VERSION 1
|
||||
#define XTRA_MIN_VERSION 4
|
||||
#define XTRA_MIN_VERSION 5
|
||||
#define XTRA_REV_VERSION 0
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -227,10 +227,11 @@ static void Help(void) {
|
||||
WebPGetEnabledInputFileFormats());
|
||||
}
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, const char* argv[]) {
|
||||
WebPPicture pic1, pic2;
|
||||
size_t size1 = 0, size2 = 0;
|
||||
int ret = 1;
|
||||
int ret = EXIT_FAILURE;
|
||||
float disto[5];
|
||||
int type = 0;
|
||||
int c;
|
||||
@ -246,7 +247,7 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
if (!WebPPictureInit(&pic1) || !WebPPictureInit(&pic2)) {
|
||||
fprintf(stderr, "Can't init pictures\n");
|
||||
FREE_WARGV_AND_RETURN(1);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (c = 1; c < argc; ++c) {
|
||||
@ -262,7 +263,7 @@ int main(int argc, const char* argv[]) {
|
||||
use_gray = 1;
|
||||
} else if (!strcmp(argv[c], "-h")) {
|
||||
help = 1;
|
||||
ret = 0;
|
||||
ret = EXIT_SUCCESS;
|
||||
} else if (!strcmp(argv[c], "-o")) {
|
||||
if (++c == argc) {
|
||||
fprintf(stderr, "missing file name after %s option.\n", argv[c - 1]);
|
||||
@ -337,7 +338,8 @@ int main(int argc, const char* argv[]) {
|
||||
fprintf(stderr, "Error during lossless encoding.\n");
|
||||
goto End;
|
||||
}
|
||||
ret = ImgIoUtilWriteFile(output, data, data_size) ? 0 : 1;
|
||||
ret = ImgIoUtilWriteFile(output, data, data_size) ? EXIT_SUCCESS
|
||||
: EXIT_FAILURE;
|
||||
WebPFree(data);
|
||||
if (ret) goto End;
|
||||
#else
|
||||
@ -345,9 +347,10 @@ int main(int argc, const char* argv[]) {
|
||||
(void)data_size;
|
||||
fprintf(stderr, "Cannot save the difference map. Please recompile "
|
||||
"without the WEBP_REDUCE_CSP flag.\n");
|
||||
goto End;
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
}
|
||||
ret = 0;
|
||||
ret = EXIT_SUCCESS;
|
||||
|
||||
End:
|
||||
WebPPictureFree(&pic1);
|
||||
|
@ -15,6 +15,7 @@
|
||||
// Author: James Zern (jzern@google.com)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "webp/config.h"
|
||||
@ -49,19 +50,26 @@ static void ProcessEvents(void) {
|
||||
}
|
||||
}
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, char* argv[]) {
|
||||
int c;
|
||||
int ok = 0;
|
||||
|
||||
INIT_WARGV(argc, argv);
|
||||
|
||||
if (argc == 1) {
|
||||
fprintf(stderr, "Usage: %s [-h] image.webp [more_files.webp...]\n",
|
||||
argv[0]);
|
||||
goto Error;
|
||||
}
|
||||
|
||||
for (c = 1; c < argc; ++c) {
|
||||
const char* file = NULL;
|
||||
const uint8_t* webp = NULL;
|
||||
size_t webp_size = 0;
|
||||
if (!strcmp(argv[c], "-h")) {
|
||||
printf("Usage: %s [-h] image.webp [more_files.webp...]\n", argv[0]);
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else {
|
||||
file = (const char*)GET_WARGV(argv, c);
|
||||
}
|
||||
@ -87,7 +95,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
Error:
|
||||
SDL_Quit();
|
||||
FREE_WARGV_AND_RETURN(ok ? 0 : 1);
|
||||
FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#else // !WEBP_HAVE_SDL
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "imageio/imageio_util.h"
|
||||
#include "../examples/unicode.h"
|
||||
|
||||
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
|
||||
int main(int argc, const char* argv[]) {
|
||||
int c;
|
||||
int quiet = 0;
|
||||
@ -27,7 +28,7 @@ int main(int argc, const char* argv[]) {
|
||||
quiet = 1;
|
||||
} else if (!strcmp(argv[c], "-help") || !strcmp(argv[c], "-h")) {
|
||||
printf("webp_quality [-h][-quiet] webp_files...\n");
|
||||
FREE_WARGV_AND_RETURN(0);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else {
|
||||
const char* const filename = (const char*)GET_WARGV(argv, c);
|
||||
const uint8_t* data = NULL;
|
||||
@ -50,5 +51,5 @@ int main(int argc, const char* argv[]) {
|
||||
free((void*)data);
|
||||
}
|
||||
}
|
||||
FREE_WARGV_AND_RETURN(ok ? 0 : 1);
|
||||
FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
@ -89,6 +89,11 @@ int ImgIoUtilReadFile(const char* const file_name,
|
||||
}
|
||||
fseek(in, 0, SEEK_END);
|
||||
file_size = ftell(in);
|
||||
if (file_size == (size_t)-1) {
|
||||
fclose(in);
|
||||
WFPRINTF(stderr, "error getting size of '%s'\n", (const W_CHAR*)file_name);
|
||||
return 0;
|
||||
}
|
||||
fseek(in, 0, SEEK_SET);
|
||||
// we allocate one extra byte for the \0 terminator
|
||||
file_data = (uint8_t*)WebPMalloc(file_size + 1);
|
||||
|
@ -206,8 +206,18 @@ struct my_error_mgr {
|
||||
|
||||
static void my_error_exit(j_common_ptr dinfo) {
|
||||
struct my_error_mgr* myerr = (struct my_error_mgr*)dinfo->err;
|
||||
// The following code is disabled in fuzzing mode because:
|
||||
// - the logs can be flooded due to invalid JPEG files
|
||||
// - msg_code is wrongfully seen as uninitialized by msan when the libjpeg
|
||||
// dependency is not built with sanitizers enabled
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
const int msg_code = myerr->pub.msg_code;
|
||||
fprintf(stderr, "libjpeg error: ");
|
||||
dinfo->err->output_message(dinfo);
|
||||
if (msg_code == JERR_INPUT_EOF || msg_code == JERR_FILE_READ) {
|
||||
fprintf(stderr, "`jpegtran -copy all` MAY be able to process this file.\n");
|
||||
}
|
||||
#endif
|
||||
longjmp(myerr->setjmp_buffer, 1);
|
||||
}
|
||||
|
||||
|
@ -139,6 +139,8 @@ static const struct {
|
||||
{ "Raw profile type xmp", ProcessRawProfile, METADATA_OFFSET(xmp) },
|
||||
// Exiftool puts exif data in APP1 chunk, too.
|
||||
{ "Raw profile type APP1", ProcessRawProfile, METADATA_OFFSET(exif) },
|
||||
// ImageMagick uses lowercase app1.
|
||||
{ "Raw profile type app1", ProcessRawProfile, METADATA_OFFSET(exif) },
|
||||
// XMP Specification Part 3, Section 3 #PNG
|
||||
{ "XML:com.adobe.xmp", MetadataCopy, METADATA_OFFSET(xmp) },
|
||||
{ NULL, NULL, 0 },
|
||||
@ -159,6 +161,20 @@ static int ExtractMetadataFromPNG(png_structp png,
|
||||
png_textp text = NULL;
|
||||
const png_uint_32 num = png_get_text(png, info, &text, NULL);
|
||||
png_uint_32 i;
|
||||
|
||||
#ifdef PNG_eXIf_SUPPORTED
|
||||
// Look for an 'eXIf' tag. Preference is given to this tag as it's newer
|
||||
// than the TextualData tags.
|
||||
{
|
||||
png_bytep exif;
|
||||
png_uint_32 len;
|
||||
|
||||
if (png_get_eXIf_1(png, info, &len, &exif) == PNG_INFO_eXIf) {
|
||||
if (!MetadataCopy((const char*)exif, len, &metadata->exif)) return 0;
|
||||
}
|
||||
}
|
||||
#endif // PNG_eXIf_SUPPORTED
|
||||
|
||||
// Look for EXIF / XMP metadata.
|
||||
for (i = 0; i < num; ++i, ++text) {
|
||||
int j;
|
||||
@ -192,6 +208,7 @@ static int ExtractMetadataFromPNG(png_structp png,
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef PNG_iCCP_SUPPORTED
|
||||
// Look for an ICC profile.
|
||||
{
|
||||
png_charp name;
|
||||
@ -208,6 +225,7 @@ static int ExtractMetadataFromPNG(png_structp png,
|
||||
if (!MetadataCopy((const char*)profile, len, &metadata->iccp)) return 0;
|
||||
}
|
||||
}
|
||||
#endif // PNG_iCCP_SUPPORTED
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ DEMUXLIBLIST=''
|
||||
if [[ -z "${SDK}" ]]; then
|
||||
echo "iOS SDK not available"
|
||||
exit 1
|
||||
elif [[ ${SDK%%.*} -gt 8 ]]; then
|
||||
elif [[ ${SDK%%.*} -gt 8 && "${XCODE%%.*}" -lt 16 ]]; then
|
||||
EXTRA_CFLAGS="-fembed-bitcode"
|
||||
elif [[ ${SDK%%.*} -le 6 ]]; then
|
||||
echo "You need iOS SDK version 6.0 or above"
|
||||
|
25
man/cwebp.1
25
man/cwebp.1
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH CWEBP 1 "March 26, 2024"
|
||||
.TH CWEBP 1 "September 17, 2024"
|
||||
.SH NAME
|
||||
cwebp \- compress an image file to a WebP file
|
||||
.SH SYNOPSIS
|
||||
@ -180,8 +180,8 @@ Disable strong filtering (if filtering is being used thanks to the
|
||||
\fB\-f\fP option) and use simple filtering instead.
|
||||
.TP
|
||||
.B \-sharp_yuv
|
||||
Use more accurate and sharper RGB->YUV conversion if needed. Note that this
|
||||
process is slower than the default 'fast' RGB->YUV conversion.
|
||||
Use more accurate and sharper RGB->YUV conversion. Note that this process is
|
||||
slower than the default 'fast' RGB->YUV conversion.
|
||||
.TP
|
||||
.BI \-sns " int
|
||||
Specify the amplitude of the spatial noise shaping. Spatial noise shaping
|
||||
@ -299,12 +299,12 @@ Note: each input format may not support all combinations.
|
||||
.B \-noasm
|
||||
Disable all assembly optimizations.
|
||||
|
||||
.SH BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://bugs.chromium.org/p/webp
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
.SH EXIT STATUS
|
||||
If there were no problems during execution, \fBcwebp\fP exits with the value of
|
||||
the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
|
||||
.PP
|
||||
If an error occurs, \fBcwebp\fP exits with the value of the C constant
|
||||
\fBEXIT_FAILURE\fP. This is usually one.
|
||||
|
||||
.SH EXAMPLES
|
||||
cwebp \-q 50 -lossless picture.png \-o picture_lossless.webp
|
||||
@ -324,6 +324,13 @@ https://chromium.googlesource.com/webm/libwebp
|
||||
This manual page was written by Pascal Massimino <pascal.massimino@gmail.com>,
|
||||
for the Debian project (and may be used by others).
|
||||
|
||||
.SH REPORTING BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://issues.webmproject.org
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR dwebp (1),
|
||||
.BR gif2webp (1)
|
||||
|
21
man/dwebp.1
21
man/dwebp.1
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH DWEBP 1 "November 17, 2021"
|
||||
.TH DWEBP 1 "July 18, 2024"
|
||||
.SH NAME
|
||||
dwebp \- decompress a WebP file to an image file
|
||||
.SH SYNOPSIS
|
||||
@ -108,12 +108,12 @@ Print extra information (decoding time in particular).
|
||||
.B \-noasm
|
||||
Disable all assembly optimizations.
|
||||
|
||||
.SH BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://bugs.chromium.org/p/webp
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
.SH EXIT STATUS
|
||||
If there were no problems during execution, \fBdwebp\fP exits with the value of
|
||||
the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
|
||||
.PP
|
||||
If an error occurs, \fBdwebp\fP exits with the value of the C constant
|
||||
\fBEXIT_FAILURE\fP. This is usually one.
|
||||
|
||||
.SH EXAMPLES
|
||||
dwebp picture.webp \-o output.png
|
||||
@ -133,6 +133,13 @@ https://chromium.googlesource.com/webm/libwebp
|
||||
This manual page was written by Pascal Massimino <pascal.massimino@gmail.com>,
|
||||
for the Debian project (and may be used by others).
|
||||
|
||||
.SH REPORTING BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://issues.webmproject.org
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR cwebp (1),
|
||||
.BR gif2webp (1),
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH GIF2WEBP 1 "November 17, 2021"
|
||||
.TH GIF2WEBP 1 "November 4, 2024"
|
||||
.SH NAME
|
||||
gif2webp \- Convert a GIF image to WebP
|
||||
.SH SYNOPSIS
|
||||
@ -39,6 +39,18 @@ Encode the image using lossy compression.
|
||||
Mixed compression mode: optimize compression of the image by picking either
|
||||
lossy or lossless compression for each frame heuristically.
|
||||
.TP
|
||||
.BI \-near_lossless " int
|
||||
Specify the level of near\-lossless image preprocessing. This option adjusts
|
||||
pixel values to help compressibility, but has minimal impact on the visual
|
||||
quality. It triggers lossless compression mode automatically. The range is 0
|
||||
(maximum preprocessing) to 100 (no preprocessing, the default). The typical
|
||||
value is around 60. Note that lossy with \fB\-q 100\fP can at times yield
|
||||
better results.
|
||||
.TP
|
||||
.B \-sharp_yuv
|
||||
Use more accurate and sharper RGB->YUV conversion. Note that this process is
|
||||
slower than the default 'fast' RGB->YUV conversion.
|
||||
.TP
|
||||
.BI \-q " float
|
||||
Specify the compression factor for RGB channels between 0 and 100. The default
|
||||
is 75.
|
||||
@ -126,12 +138,12 @@ Print extra information.
|
||||
.B \-quiet
|
||||
Do not print anything.
|
||||
|
||||
.SH BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://bugs.chromium.org/p/webp
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
.SH EXIT STATUS
|
||||
If there were no problems during execution, \fBgif2webp\fP exits with the value
|
||||
of the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
|
||||
.PP
|
||||
If an error occurs, \fBgif2webp\fP exits with the value of the C constant
|
||||
\fBEXIT_FAILURE\fP. This is usually one.
|
||||
|
||||
.SH EXAMPLES
|
||||
gif2webp picture.gif \-o picture.webp
|
||||
@ -155,6 +167,13 @@ https://chromium.googlesource.com/webm/libwebp
|
||||
This manual page was written by Urvang Joshi <urvang@google.com>, for the
|
||||
Debian project (and may be used by others).
|
||||
|
||||
.SH REPORTING BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://issues.webmproject.org
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR cwebp (1),
|
||||
.BR dwebp (1),
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH IMG2WEBP 1 "March 17, 2023"
|
||||
.TH IMG2WEBP 1 "November 26, 2024"
|
||||
.SH NAME
|
||||
img2webp \- create animated WebP file from a sequence of input images.
|
||||
.SH SYNOPSIS
|
||||
@ -53,8 +53,8 @@ value is around 60. Note that lossy with \fB\-q 100\fP can at times yield
|
||||
better results.
|
||||
.TP
|
||||
.B \-sharp_yuv
|
||||
Use more accurate and sharper RGB->YUV conversion if needed. Note that this
|
||||
process is slower than the default 'fast' RGB->YUV conversion.
|
||||
Use more accurate and sharper RGB->YUV conversion. Note that this process is
|
||||
slower than the default 'fast' RGB->YUV conversion.
|
||||
.TP
|
||||
.BI \-loop " int
|
||||
Specifies the number of times the animation should loop. Using '0'
|
||||
@ -88,18 +88,27 @@ Specify the compression factor between 0 and 100. The default is 75.
|
||||
Specify the compression method to use. This parameter controls the
|
||||
trade off between encoding speed and the compressed file size and quality.
|
||||
Possible values range from 0 to 6. Default value is 4.
|
||||
When higher values are used, the encoder will spend more time inspecting
|
||||
additional encoding possibilities and decide on the quality gain.
|
||||
Lower value can result in faster processing time at the expense of
|
||||
larger file size and lower compression quality.
|
||||
.TP
|
||||
.B \-exact, \-noexact
|
||||
Preserve or alter RGB values in transparent area. The default is
|
||||
\fB-noexact\fP, to help compressibility. Note \fB\-noexact\fP may cause
|
||||
artifacts in frames compressed with \fB\-lossy\fP.
|
||||
|
||||
.SH EXIT STATUS
|
||||
If there were no problems during execution, \fBimg2webp\fP exits with the value
|
||||
of the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
|
||||
.PP
|
||||
If an error occurs, \fBimg2webp\fP exits with the value of the C constant
|
||||
\fBEXIT_FAILURE\fP. This is usually one.
|
||||
|
||||
.SH EXAMPLE
|
||||
img2webp -loop 2 in0.png -lossy in1.jpg -d 80 in2.tiff -o out.webp
|
||||
.br
|
||||
|
||||
.SH BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://bugs.chromium.org/p/webp
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
|
||||
.SH AUTHORS
|
||||
\fBimg2webp\fP is a part of libwebp and was written by the WebP team.
|
||||
.br
|
||||
@ -109,6 +118,13 @@ https://chromium.googlesource.com/webm/libwebp
|
||||
This manual page was written by Pascal Massimino <pascal.massimino@gmail.com>,
|
||||
for the Debian project (and may be used by others).
|
||||
|
||||
.SH REPORTING BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://issues.webmproject.org
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR webpmux (1),
|
||||
.BR gif2webp (1)
|
||||
|
21
man/vwebp.1
21
man/vwebp.1
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH VWEBP 1 "November 17, 2021"
|
||||
.TH VWEBP 1 "July 18, 2024"
|
||||
.SH NAME
|
||||
vwebp \- decompress a WebP file and display it in a window
|
||||
.SH SYNOPSIS
|
||||
@ -72,12 +72,12 @@ Disable blending and disposal process, for debugging purposes.
|
||||
.B 'q' / 'Q' / ESC
|
||||
Quit.
|
||||
|
||||
.SH BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://bugs.chromium.org/p/webp
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
.SH EXIT STATUS
|
||||
If there were no problems during execution, \fBvwebp\fP exits with the value of
|
||||
the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
|
||||
.PP
|
||||
If an error occurs, \fBvwebp\fP exits with the value of the C constant
|
||||
\fBEXIT_FAILURE\fP. This is usually one.
|
||||
|
||||
.SH EXAMPLES
|
||||
vwebp picture.webp
|
||||
@ -94,6 +94,13 @@ https://chromium.googlesource.com/webm/libwebp
|
||||
.PP
|
||||
This manual page was written for the Debian project (and may be used by others).
|
||||
|
||||
.SH REPORTING BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://issues.webmproject.org
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR dwebp (1)
|
||||
.br
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH WEBPINFO 1 "November 17, 2021"
|
||||
.TH WEBPINFO 1 "July 18, 2024"
|
||||
.SH NAME
|
||||
webpinfo \- print out the chunk level structure of WebP files
|
||||
along with basic integrity checks.
|
||||
@ -47,12 +47,12 @@ Detailed usage instructions.
|
||||
Input files in WebP format. Input files must come last, following
|
||||
options (if any). There can be multiple input files.
|
||||
|
||||
.SH BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://bugs.chromium.org/p/webp
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
.SH EXIT STATUS
|
||||
If there were no problems during execution, \fBwebpinfo\fP exits with the value
|
||||
of the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
|
||||
.PP
|
||||
If an error occurs, \fBwebpinfo\fP exits with the value of the C constant
|
||||
\fBEXIT_FAILURE\fP. This is usually one.
|
||||
|
||||
.SH EXAMPLES
|
||||
.br
|
||||
@ -73,6 +73,13 @@ https://chromium.googlesource.com/webm/libwebp
|
||||
This manual page was written by Hui Su <huisu@google.com>,
|
||||
for the Debian project (and may be used by others).
|
||||
|
||||
.SH REPORTING BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://issues.webmproject.org
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR webpmux (1)
|
||||
.br
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH WEBPMUX 1 "November 17, 2021"
|
||||
.TH WEBPMUX 1 "July 18, 2024"
|
||||
.SH NAME
|
||||
webpmux \- create animated WebP files from non\-animated WebP images, extract
|
||||
frames from animated WebP images, and manage XMP/EXIF metadata and ICC profile.
|
||||
@ -186,12 +186,12 @@ Output file in WebP format.
|
||||
.TP
|
||||
The nature of EXIF, XMP and ICC data is not checked and is assumed to be valid.
|
||||
|
||||
.SH BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://bugs.chromium.org/p/webp
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
.SH EXIT STATUS
|
||||
If there were no problems during execution, \fBwebpmux\fP exits with the value
|
||||
of the C constant \fBEXIT_SUCCESS\fP. This is usually zero.
|
||||
.PP
|
||||
If an error occurs, \fBwebpmux\fP exits with the value of the C constant
|
||||
\fBEXIT_FAILURE\fP. This is usually one.
|
||||
|
||||
.SH EXAMPLES
|
||||
.P
|
||||
@ -262,6 +262,13 @@ https://chromium.googlesource.com/webm/libwebp
|
||||
This manual page was written by Vikas Arora <vikaas.arora@gmail.com>,
|
||||
for the Debian project (and may be used by others).
|
||||
|
||||
.SH REPORTING BUGS
|
||||
Please report all bugs to the issue tracker:
|
||||
https://issues.webmproject.org
|
||||
.br
|
||||
Patches welcome! See this page to get started:
|
||||
https://www.webmproject.org/code/contribute/submitting\-patches/
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR cwebp (1),
|
||||
.BR dwebp (1),
|
||||
|
@ -33,7 +33,7 @@ libsharpyuv_la_SOURCES += sharpyuv_gamma.c sharpyuv_gamma.h
|
||||
libsharpyuv_la_SOURCES += sharpyuv.c sharpyuv.h
|
||||
|
||||
libsharpyuv_la_CPPFLAGS = $(AM_CPPFLAGS)
|
||||
libsharpyuv_la_LDFLAGS = -no-undefined -version-info 1:0:1 -lm
|
||||
libsharpyuv_la_LDFLAGS = -no-undefined -version-info 1:1:1 -lm
|
||||
libsharpyuv_la_LIBADD =
|
||||
libsharpyuv_la_LIBADD += libsharpyuv_sse2.la
|
||||
libsharpyuv_la_LIBADD += libsharpyuv_neon.la
|
||||
|
@ -6,8 +6,8 @@
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 0,0,4,0
|
||||
PRODUCTVERSION 0,0,4,0
|
||||
FILEVERSION 0,0,4,1
|
||||
PRODUCTVERSION 0,0,4,1
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -24,12 +24,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libsharpyuv DLL"
|
||||
VALUE "FileVersion", "0.4.0"
|
||||
VALUE "FileVersion", "0.4.1"
|
||||
VALUE "InternalName", "libsharpyuv.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2024"
|
||||
VALUE "OriginalFilename", "libsharpyuv.dll"
|
||||
VALUE "ProductName", "SharpYuv Library"
|
||||
VALUE "ProductVersion", "0.4.0"
|
||||
VALUE "ProductVersion", "0.4.1"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -565,10 +565,11 @@ int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr,
|
||||
scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
|
||||
scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
|
||||
|
||||
return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
|
||||
rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
|
||||
v_ptr, v_stride, yuv_bit_depth, width, height,
|
||||
&scaled_matrix, transfer_type);
|
||||
return DoSharpArgbToYuv(
|
||||
(const uint8_t*)r_ptr, (const uint8_t*)g_ptr, (const uint8_t*)b_ptr,
|
||||
rgb_step, rgb_stride, rgb_bit_depth, (uint8_t*)y_ptr, y_stride,
|
||||
(uint8_t*)u_ptr, u_stride, (uint8_t*)v_ptr, v_stride, yuv_bit_depth,
|
||||
width, height, &scaled_matrix, transfer_type);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -52,7 +52,7 @@ extern "C" {
|
||||
// SharpYUV API version following the convention from semver.org
|
||||
#define SHARPYUV_VERSION_MAJOR 0
|
||||
#define SHARPYUV_VERSION_MINOR 4
|
||||
#define SHARPYUV_VERSION_PATCH 0
|
||||
#define SHARPYUV_VERSION_PATCH 1
|
||||
// Version as a uint32_t. The major number is the high 8 bits.
|
||||
// The minor number is the middle 8 bits. The patch number is the low 16 bits.
|
||||
#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
|
||||
@ -66,10 +66,17 @@ extern "C" {
|
||||
SHARPYUV_EXTERN int SharpYuvGetVersion(void);
|
||||
|
||||
// RGB to YUV conversion matrix, in 16 bit fixed point.
|
||||
// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
|
||||
// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
|
||||
// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
|
||||
// Then y, u and v values are divided by 1<<16 and rounded.
|
||||
// y_ = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
|
||||
// u_ = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
|
||||
// v_ = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
|
||||
// Then the values are divided by 1<<16 and rounded.
|
||||
// y = (y_ + (1 << 15)) >> 16
|
||||
// u = (u_ + (1 << 15)) >> 16
|
||||
// v = (v_ + (1 << 15)) >> 16
|
||||
//
|
||||
// Typically, the offset values rgb_to_y[3], rgb_to_u[3] and rgb_to_v[3] depend
|
||||
// on the input's bit depth, e.g., rgb_to_u[3] = 1 << (rgb_bit_depth - 1 + 16).
|
||||
// See also sharpyuv_csp.h to get a predefined matrix or generate a matrix.
|
||||
typedef struct {
|
||||
int rgb_to_y[4];
|
||||
int rgb_to_u[4];
|
||||
@ -127,6 +134,8 @@ typedef enum SharpYuvTransferFunctionType {
|
||||
// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
|
||||
// should be multiples of 2.
|
||||
// width, height: width and height of the image in pixels
|
||||
// yuv_matrix: RGB to YUV conversion matrix. The matrix values typically
|
||||
// depend on the input's rgb_bit_depth.
|
||||
// This function calls SharpYuvConvertWithOptions with a default transfer
|
||||
// function of kSharpYuvTransferFunctionSrgb.
|
||||
SHARPYUV_EXTERN int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
|
||||
|
@ -22,16 +22,16 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
const float kr = yuv_color_space->kr;
|
||||
const float kb = yuv_color_space->kb;
|
||||
const float kg = 1.0f - kr - kb;
|
||||
const float cr = 0.5f / (1.0f - kb);
|
||||
const float cb = 0.5f / (1.0f - kr);
|
||||
const float cb = 0.5f / (1.0f - kb);
|
||||
const float cr = 0.5f / (1.0f - kr);
|
||||
|
||||
const int shift = yuv_color_space->bit_depth - 8;
|
||||
|
||||
const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
|
||||
float scale_y = 1.0f;
|
||||
float add_y = 0.0f;
|
||||
float scale_u = cr;
|
||||
float scale_v = cb;
|
||||
float scale_u = cb;
|
||||
float scale_v = cr;
|
||||
float add_uv = (float)(128 << shift);
|
||||
assert(yuv_color_space->bit_depth >= 8);
|
||||
|
||||
@ -59,31 +59,35 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
}
|
||||
|
||||
// Matrices are in YUV_FIX fixed point precision.
|
||||
// WebP's matrix, similar but not identical to kRec601LimitedMatrix.
|
||||
// WebP's matrix, similar but not identical to kRec601LimitedMatrix
|
||||
// Derived using the following formulas:
|
||||
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
|
||||
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
|
||||
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
|
||||
static const SharpYuvConversionMatrix kWebpMatrix = {
|
||||
{16839, 33059, 6420, 16 << 16},
|
||||
{-9719, -19081, 28800, 128 << 16},
|
||||
{28800, -24116, -4684, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
|
||||
{16829, 33039, 6416, 16 << 16},
|
||||
{-9714, -19071, 28784, 128 << 16},
|
||||
{28784, -24103, -4681, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec601FullMatrix = {
|
||||
{19595, 38470, 7471, 0},
|
||||
{-11058, -21710, 32768, 128 << 16},
|
||||
{32768, -27439, -5329, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
|
||||
{11966, 40254, 4064, 16 << 16},
|
||||
{-6596, -22189, 28784, 128 << 16},
|
||||
{28784, -26145, -2639, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec709FullMatrix = {
|
||||
{13933, 46871, 4732, 0},
|
||||
{-7509, -25259, 32768, 128 << 16},
|
||||
|
@ -41,10 +41,15 @@ SHARPYUV_EXTERN void SharpYuvComputeConversionMatrix(
|
||||
|
||||
// Enums for precomputed conversion matrices.
|
||||
typedef enum {
|
||||
// WebP's matrix, similar but not identical to kSharpYuvMatrixRec601Limited
|
||||
kSharpYuvMatrixWebp = 0,
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
kSharpYuvMatrixRec601Limited,
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeFull
|
||||
kSharpYuvMatrixRec601Full,
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
kSharpYuvMatrixRec709Limited,
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeFull
|
||||
kSharpYuvMatrixRec709Full,
|
||||
kSharpYuvMatrixNum
|
||||
} SharpYuvMatrixType;
|
||||
|
@ -36,7 +36,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
|
||||
# other than the ones listed on the command line, i.e., after linking, it will
|
||||
# not have unresolved symbols. Some platforms (Windows among them) require all
|
||||
# symbols in shared libraries to be resolved at library creation.
|
||||
libwebp_la_LDFLAGS = -no-undefined -version-info 8:9:1
|
||||
libwebp_la_LDFLAGS = -no-undefined -version-info 8:10:1
|
||||
libwebpincludedir = $(includedir)/webp
|
||||
pkgconfig_DATA = libwebp.pc
|
||||
|
||||
@ -48,7 +48,7 @@ if BUILD_LIBWEBPDECODER
|
||||
libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
|
||||
libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la
|
||||
|
||||
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 4:9:1
|
||||
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 4:10:1
|
||||
pkgconfig_DATA += libwebpdecoder.pc
|
||||
endif
|
||||
|
||||
|
@ -26,10 +26,9 @@ static const uint8_t kModeBpp[MODE_LAST] = {
|
||||
4, 4, 4, 2, // pre-multiplied modes
|
||||
1, 1 };
|
||||
|
||||
// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
|
||||
// Convert to an integer to handle both the unsigned/signed enum cases
|
||||
// without the need for casting to remove type limit warnings.
|
||||
static int IsValidColorspace(int webp_csp_mode) {
|
||||
int IsValidColorspace(int webp_csp_mode) {
|
||||
return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
|
||||
}
|
||||
|
||||
|
@ -51,4 +51,7 @@ enum { MB_FEATURE_TREE_PROBS = 3,
|
||||
NUM_PROBAS = 11
|
||||
};
|
||||
|
||||
// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
|
||||
int IsValidColorspace(int webp_csp_mode);
|
||||
|
||||
#endif // WEBP_DEC_COMMON_DEC_H_
|
||||
|
@ -12,7 +12,9 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
@ -25,9 +27,9 @@
|
||||
static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
|
||||
WebPDecBuffer* output = p->output;
|
||||
const WebPYUVABuffer* const buf = &output->u.YUVA;
|
||||
uint8_t* const y_dst = buf->y + (size_t)io->mb_y * buf->y_stride;
|
||||
uint8_t* const u_dst = buf->u + (size_t)(io->mb_y >> 1) * buf->u_stride;
|
||||
uint8_t* const v_dst = buf->v + (size_t)(io->mb_y >> 1) * buf->v_stride;
|
||||
uint8_t* const y_dst = buf->y + (ptrdiff_t)io->mb_y * buf->y_stride;
|
||||
uint8_t* const u_dst = buf->u + (ptrdiff_t)(io->mb_y >> 1) * buf->u_stride;
|
||||
uint8_t* const v_dst = buf->v + (ptrdiff_t)(io->mb_y >> 1) * buf->v_stride;
|
||||
const int mb_w = io->mb_w;
|
||||
const int mb_h = io->mb_h;
|
||||
const int uv_w = (mb_w + 1) / 2;
|
||||
@ -42,7 +44,7 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
|
||||
static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
|
||||
WebPDecBuffer* const output = p->output;
|
||||
WebPRGBABuffer* const buf = &output->u.RGBA;
|
||||
uint8_t* const dst = buf->rgba + (size_t)io->mb_y * buf->stride;
|
||||
uint8_t* const dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
|
||||
WebPSamplerProcessPlane(io->y, io->y_stride,
|
||||
io->u, io->v, io->uv_stride,
|
||||
dst, buf->stride, io->mb_w, io->mb_h,
|
||||
@ -57,7 +59,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
|
||||
static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
|
||||
int num_lines_out = io->mb_h; // a priori guess
|
||||
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
|
||||
uint8_t* dst = buf->rgba + (size_t)io->mb_y * buf->stride;
|
||||
uint8_t* dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
|
||||
WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
|
||||
const uint8_t* cur_y = io->y;
|
||||
const uint8_t* cur_u = io->u;
|
||||
@ -128,7 +130,7 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
|
||||
const WebPYUVABuffer* const buf = &p->output->u.YUVA;
|
||||
const int mb_w = io->mb_w;
|
||||
const int mb_h = io->mb_h;
|
||||
uint8_t* dst = buf->a + (size_t)io->mb_y * buf->a_stride;
|
||||
uint8_t* dst = buf->a + (ptrdiff_t)io->mb_y * buf->a_stride;
|
||||
int j;
|
||||
(void)expected_num_lines_out;
|
||||
assert(expected_num_lines_out == mb_h);
|
||||
@ -181,8 +183,8 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
|
||||
(colorspace == MODE_ARGB || colorspace == MODE_Argb);
|
||||
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
|
||||
int num_rows;
|
||||
const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
|
||||
uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
|
||||
const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
|
||||
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)start_y * buf->stride;
|
||||
uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
|
||||
const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w,
|
||||
num_rows, dst, buf->stride);
|
||||
@ -205,8 +207,8 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
|
||||
const WEBP_CSP_MODE colorspace = p->output->colorspace;
|
||||
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
|
||||
int num_rows;
|
||||
const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
|
||||
uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
|
||||
const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
|
||||
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)start_y * buf->stride;
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
uint8_t* alpha_dst = base_rgba;
|
||||
#else
|
||||
@ -271,9 +273,9 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
|
||||
static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
|
||||
int expected_num_lines_out) {
|
||||
const WebPYUVABuffer* const buf = &p->output->u.YUVA;
|
||||
uint8_t* const dst_a = buf->a + (size_t)p->last_y * buf->a_stride;
|
||||
uint8_t* const dst_a = buf->a + (ptrdiff_t)p->last_y * buf->a_stride;
|
||||
if (io->a != NULL) {
|
||||
uint8_t* const dst_y = buf->y + (size_t)p->last_y * buf->y_stride;
|
||||
uint8_t* const dst_y = buf->y + (ptrdiff_t)p->last_y * buf->y_stride;
|
||||
const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a);
|
||||
assert(expected_num_lines_out == num_lines_out);
|
||||
if (num_lines_out > 0) { // unmultiply the Y
|
||||
@ -362,7 +364,7 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
|
||||
const WebPYUV444Converter convert =
|
||||
WebPYUV444Converters[p->output->colorspace];
|
||||
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
|
||||
uint8_t* dst = buf->rgba + (size_t)y_pos * buf->stride;
|
||||
uint8_t* dst = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
|
||||
int num_lines_out = 0;
|
||||
// For RGB rescaling, because of the YUV420, current scan position
|
||||
// U/V can be +1/-1 line from the Y one. Hence the double test.
|
||||
@ -389,14 +391,14 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
|
||||
while (j < mb_h) {
|
||||
const int y_lines_in =
|
||||
WebPRescalerImport(p->scaler_y, mb_h - j,
|
||||
io->y + (size_t)j * io->y_stride, io->y_stride);
|
||||
io->y + (ptrdiff_t)j * io->y_stride, io->y_stride);
|
||||
j += y_lines_in;
|
||||
if (WebPRescaleNeededLines(p->scaler_u, uv_mb_h - uv_j)) {
|
||||
const int u_lines_in = WebPRescalerImport(
|
||||
p->scaler_u, uv_mb_h - uv_j, io->u + (size_t)uv_j * io->uv_stride,
|
||||
p->scaler_u, uv_mb_h - uv_j, io->u + (ptrdiff_t)uv_j * io->uv_stride,
|
||||
io->uv_stride);
|
||||
const int v_lines_in = WebPRescalerImport(
|
||||
p->scaler_v, uv_mb_h - uv_j, io->v + (size_t)uv_j * io->uv_stride,
|
||||
p->scaler_v, uv_mb_h - uv_j, io->v + (ptrdiff_t)uv_j * io->uv_stride,
|
||||
io->uv_stride);
|
||||
(void)v_lines_in; // remove a gcc warning
|
||||
assert(u_lines_in == v_lines_in);
|
||||
@ -409,7 +411,7 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
|
||||
|
||||
static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
|
||||
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
|
||||
uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride;
|
||||
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
|
||||
const WEBP_CSP_MODE colorspace = p->output->colorspace;
|
||||
const int alpha_first =
|
||||
(colorspace == MODE_ARGB || colorspace == MODE_Argb);
|
||||
@ -437,7 +439,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
|
||||
static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
|
||||
int max_lines_out) {
|
||||
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
|
||||
uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride;
|
||||
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
uint8_t* alpha_dst = base_rgba;
|
||||
#else
|
||||
@ -476,7 +478,7 @@ static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
|
||||
int lines_left = expected_num_out_lines;
|
||||
const int y_end = p->last_y + lines_left;
|
||||
while (lines_left > 0) {
|
||||
const int64_t row_offset = (int64_t)scaler->src_y - io->mb_y;
|
||||
const int64_t row_offset = (ptrdiff_t)scaler->src_y - io->mb_y;
|
||||
WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y,
|
||||
io->a + row_offset * io->width, io->width);
|
||||
lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left);
|
||||
|
@ -16,7 +16,8 @@
|
||||
#include "src/utils/bit_reader_inl_utils.h"
|
||||
|
||||
#if !defined(USE_GENERIC_TREE)
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && \
|
||||
!defined(__wasm__)
|
||||
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
|
||||
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
|
||||
#else
|
||||
|
@ -32,7 +32,7 @@ extern "C" {
|
||||
|
||||
// version numbers
|
||||
#define DEC_MAJ_VERSION 1
|
||||
#define DEC_MIN_VERSION 4
|
||||
#define DEC_MIN_VERSION 5
|
||||
#define DEC_REV_VERSION 0
|
||||
|
||||
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||
|
@ -13,6 +13,7 @@
|
||||
// Jyrki Alakuijala (jyrki@google.com)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dec/alphai_dec.h"
|
||||
@ -20,10 +21,9 @@
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include "src/utils/huffman_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
|
||||
#define NUM_ARGB_CACHE_ROWS 16
|
||||
|
||||
@ -381,7 +381,8 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
|
||||
if (allow_recursion && VP8LReadBits(br, 1)) {
|
||||
// use meta Huffman codes.
|
||||
const int huffman_precision = VP8LReadBits(br, 3) + 2;
|
||||
const int huffman_precision =
|
||||
MIN_HUFFMAN_BITS + VP8LReadBits(br, NUM_HUFFMAN_BITS);
|
||||
const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision);
|
||||
const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision);
|
||||
const int huffman_pixs = huffman_xsize * huffman_ysize;
|
||||
@ -624,8 +625,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
|
||||
int num_lines_in = 0;
|
||||
int num_lines_out = 0;
|
||||
while (num_lines_in < mb_h) {
|
||||
uint8_t* const row_in = in + (uint64_t)num_lines_in * in_stride;
|
||||
uint8_t* const row_out = out + (uint64_t)num_lines_out * out_stride;
|
||||
uint8_t* const row_in = in + (ptrdiff_t)num_lines_in * in_stride;
|
||||
uint8_t* const row_out = out + (ptrdiff_t)num_lines_out * out_stride;
|
||||
const int lines_left = mb_h - num_lines_in;
|
||||
const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
|
||||
int lines_imported;
|
||||
@ -827,7 +828,7 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
|
||||
if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA
|
||||
const WebPRGBABuffer* const buf = &output->u.RGBA;
|
||||
uint8_t* const rgba =
|
||||
buf->rgba + (int64_t)dec->last_out_row_ * buf->stride;
|
||||
buf->rgba + (ptrdiff_t)dec->last_out_row_ * buf->stride;
|
||||
const int num_rows_out =
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
io->use_scaling ?
|
||||
@ -1351,7 +1352,8 @@ static int ReadTransform(int* const xsize, int const* ysize,
|
||||
switch (type) {
|
||||
case PREDICTOR_TRANSFORM:
|
||||
case CROSS_COLOR_TRANSFORM:
|
||||
transform->bits_ = VP8LReadBits(br, 3) + 2;
|
||||
transform->bits_ =
|
||||
MIN_TRANSFORM_BITS + VP8LReadBits(br, NUM_TRANSFORM_BITS);
|
||||
ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize_,
|
||||
transform->bits_),
|
||||
VP8LSubSampleSize(transform->ysize_,
|
||||
@ -1416,7 +1418,9 @@ VP8LDecoder* VP8LNew(void) {
|
||||
return dec;
|
||||
}
|
||||
|
||||
void VP8LClear(VP8LDecoder* const dec) {
|
||||
// Resets the decoder in its initial state, reclaiming memory.
|
||||
// Preserves the dec->status_ value.
|
||||
static void VP8LClear(VP8LDecoder* const dec) {
|
||||
int i;
|
||||
if (dec == NULL) return;
|
||||
ClearMetadata(&dec->hdr_);
|
||||
|
@ -121,10 +121,6 @@ WEBP_NODISCARD int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);
|
||||
// this function. Returns false in case of error, with updated dec->status_.
|
||||
WEBP_NODISCARD int VP8LDecodeImage(VP8LDecoder* const dec);
|
||||
|
||||
// Resets the decoder in its initial state, reclaiming memory.
|
||||
// Preserves the dec->status_ value.
|
||||
void VP8LClear(VP8LDecoder* const dec);
|
||||
|
||||
// Clears and deallocate a lossless decoder instance.
|
||||
void VP8LDelete(VP8LDecoder* const dec);
|
||||
|
||||
|
@ -13,13 +13,15 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dec/common_dec.h"
|
||||
#include "src/dec/vp8_dec.h"
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/dec/webpi_dec.h"
|
||||
#include "src/utils/rescaler_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/mux_types.h" // ALPHA_FLAG
|
||||
#include "src/webp/decode.h"
|
||||
#include "src/webp/mux_types.h" // ALPHA_FLAG
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -747,6 +749,61 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int WebPCheckCropDimensionsBasic(int x, int y, int w, int h) {
|
||||
return !(x < 0 || y < 0 || w <= 0 || h <= 0);
|
||||
}
|
||||
|
||||
int WebPValidateDecoderConfig(const WebPDecoderConfig* config) {
|
||||
const WebPDecoderOptions* options;
|
||||
if (config == NULL) return 0;
|
||||
if (!IsValidColorspace(config->output.colorspace)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
options = &config->options;
|
||||
// bypass_filtering, no_fancy_upsampling, use_cropping, use_scaling,
|
||||
// use_threads, flip can be any integer and are interpreted as boolean.
|
||||
|
||||
// Check for cropping.
|
||||
if (options->use_cropping && !WebPCheckCropDimensionsBasic(
|
||||
options->crop_left, options->crop_top,
|
||||
options->crop_width, options->crop_height)) {
|
||||
return 0;
|
||||
}
|
||||
// Check for scaling.
|
||||
if (options->use_scaling &&
|
||||
(options->scaled_width < 0 || options->scaled_height < 0 ||
|
||||
(options->scaled_width == 0 && options->scaled_height == 0))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// In case the WebPBitstreamFeatures has been filled in, check further.
|
||||
if (config->input.width > 0 || config->input.height > 0) {
|
||||
int scaled_width = options->scaled_width;
|
||||
int scaled_height = options->scaled_height;
|
||||
if (options->use_cropping &&
|
||||
!WebPCheckCropDimensions(config->input.width, config->input.height,
|
||||
options->crop_left, options->crop_top,
|
||||
options->crop_width, options->crop_height)) {
|
||||
return 0;
|
||||
}
|
||||
if (options->use_scaling && !WebPRescalerGetScaledDimensions(
|
||||
config->input.width, config->input.height,
|
||||
&scaled_width, &scaled_height)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for dithering.
|
||||
if (options->dithering_strength < 0 || options->dithering_strength > 100 ||
|
||||
options->alpha_dithering_strength < 0 ||
|
||||
options->alpha_dithering_strength > 100) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
|
||||
WebPBitstreamFeatures* features,
|
||||
int version) {
|
||||
@ -806,8 +863,8 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
|
||||
|
||||
int WebPCheckCropDimensions(int image_width, int image_height,
|
||||
int x, int y, int w, int h) {
|
||||
return !(x < 0 || y < 0 || w <= 0 || h <= 0 ||
|
||||
x >= image_width || w > image_width || w > image_width - x ||
|
||||
return WebPCheckCropDimensionsBasic(x, y, w, h) &&
|
||||
!(x >= image_width || w > image_width || w > image_width - x ||
|
||||
y >= image_height || h > image_height || h > image_height - y);
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,6 @@ noinst_HEADERS =
|
||||
noinst_HEADERS += ../webp/format_constants.h
|
||||
|
||||
libwebpdemux_la_LIBADD = ../libwebp.la
|
||||
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:15:0
|
||||
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:16:0
|
||||
libwebpdemuxincludedir = $(includedir)/webp
|
||||
pkgconfig_DATA = libwebpdemux.pc
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "src/webp/format_constants.h"
|
||||
|
||||
#define DMUX_MAJ_VERSION 1
|
||||
#define DMUX_MIN_VERSION 4
|
||||
#define DMUX_MIN_VERSION 5
|
||||
#define DMUX_REV_VERSION 0
|
||||
|
||||
typedef struct {
|
||||
|
@ -6,8 +6,8 @@
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 1,0,4,0
|
||||
PRODUCTVERSION 1,0,4,0
|
||||
FILEVERSION 1,0,5,0
|
||||
PRODUCTVERSION 1,0,5,0
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -24,12 +24,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libwebpdemux DLL"
|
||||
VALUE "FileVersion", "1.4.0"
|
||||
VALUE "FileVersion", "1.5.0"
|
||||
VALUE "InternalName", "libwebpdemux.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2024"
|
||||
VALUE "OriginalFilename", "libwebpdemux.dll"
|
||||
VALUE "ProductName", "WebP Image Demuxer"
|
||||
VALUE "ProductVersion", "1.4.0"
|
||||
VALUE "ProductVersion", "1.5.0"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -5,6 +5,8 @@ noinst_LTLIBRARIES += libwebpdsp_sse2.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_sse2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_sse41.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_sse41.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_avx2.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_avx2.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_neon.la
|
||||
noinst_LTLIBRARIES += libwebpdspdecode_neon.la
|
||||
noinst_LTLIBRARIES += libwebpdsp_msa.la
|
||||
@ -44,6 +46,11 @@ ENC_SOURCES += lossless_enc.c
|
||||
ENC_SOURCES += quant.h
|
||||
ENC_SOURCES += ssim.c
|
||||
|
||||
libwebpdspdecode_avx2_la_SOURCES =
|
||||
libwebpdspdecode_avx2_la_SOURCES += lossless_avx2.c
|
||||
libwebpdspdecode_avx2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
|
||||
libwebpdspdecode_avx2_la_CFLAGS = $(AM_CFLAGS) $(AVX2_FLAGS)
|
||||
|
||||
libwebpdspdecode_sse41_la_SOURCES =
|
||||
libwebpdspdecode_sse41_la_SOURCES += alpha_processing_sse41.c
|
||||
libwebpdspdecode_sse41_la_SOURCES += dec_sse41.c
|
||||
@ -123,6 +130,12 @@ libwebpdsp_sse41_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
|
||||
libwebpdsp_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_FLAGS)
|
||||
libwebpdsp_sse41_la_LIBADD = libwebpdspdecode_sse41.la
|
||||
|
||||
libwebpdsp_avx2_la_SOURCES =
|
||||
libwebpdsp_avx2_la_SOURCES += lossless_enc_avx2.c
|
||||
libwebpdsp_avx2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
|
||||
libwebpdsp_avx2_la_CFLAGS = $(AM_CFLAGS) $(AVX2_FLAGS)
|
||||
libwebpdsp_avx2_la_LIBADD = libwebpdspdecode_avx2.la
|
||||
|
||||
libwebpdsp_neon_la_SOURCES =
|
||||
libwebpdsp_neon_la_SOURCES += cost_neon.c
|
||||
libwebpdsp_neon_la_SOURCES += enc_neon.c
|
||||
@ -167,6 +180,7 @@ libwebpdsp_la_LDFLAGS = -lm
|
||||
libwebpdsp_la_LIBADD =
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_sse2.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_sse41.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_neon.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_msa.la
|
||||
libwebpdsp_la_LIBADD += libwebpdsp_mips32.la
|
||||
@ -180,6 +194,7 @@ if BUILD_LIBWEBPDECODER
|
||||
libwebpdspdecode_la_LIBADD =
|
||||
libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse2.la
|
||||
libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse41.la
|
||||
libwebpdspdecode_la_LIBADD += libwebpdspdecode_avx2.la
|
||||
libwebpdspdecode_la_LIBADD += libwebpdspdecode_neon.la
|
||||
libwebpdspdecode_la_LIBADD += libwebpdspdecode_msa.la
|
||||
libwebpdspdecode_la_LIBADD += libwebpdspdecode_mips32.la
|
||||
|
@ -16,6 +16,8 @@
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
|
||||
@ -26,38 +28,44 @@ static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
|
||||
uint32_t alpha_and = 0xff;
|
||||
int i, j;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i rgb_mask = _mm_set1_epi32((int)0xffffff00); // to preserve RGB
|
||||
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0);
|
||||
__m128i all_alphas = all_0xff;
|
||||
const __m128i alpha_mask = _mm_set1_epi32((int)0xff); // to preserve A
|
||||
const __m128i all_0xff = _mm_set1_epi8(0xff);
|
||||
__m128i all_alphas16 = all_0xff;
|
||||
__m128i all_alphas8 = all_0xff;
|
||||
|
||||
// We must be able to access 3 extra bytes after the last written byte
|
||||
// 'dst[4 * width - 4]', because we don't know if alpha is the first or the
|
||||
// last byte of the quadruplet.
|
||||
const int limit = (width - 1) & ~7;
|
||||
|
||||
for (j = 0; j < height; ++j) {
|
||||
__m128i* out = (__m128i*)dst;
|
||||
for (i = 0; i < limit; i += 8) {
|
||||
char* ptr = (char*)dst;
|
||||
for (i = 0; i + 16 <= width - 1; i += 16) {
|
||||
// load 16 alpha bytes
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
|
||||
const __m128i a1_lo = _mm_unpacklo_epi8(a0, zero);
|
||||
const __m128i a1_hi = _mm_unpackhi_epi8(a0, zero);
|
||||
const __m128i a2_lo_lo = _mm_unpacklo_epi16(a1_lo, zero);
|
||||
const __m128i a2_lo_hi = _mm_unpackhi_epi16(a1_lo, zero);
|
||||
const __m128i a2_hi_lo = _mm_unpacklo_epi16(a1_hi, zero);
|
||||
const __m128i a2_hi_hi = _mm_unpackhi_epi16(a1_hi, zero);
|
||||
_mm_maskmoveu_si128(a2_lo_lo, alpha_mask, ptr + 0);
|
||||
_mm_maskmoveu_si128(a2_lo_hi, alpha_mask, ptr + 16);
|
||||
_mm_maskmoveu_si128(a2_hi_lo, alpha_mask, ptr + 32);
|
||||
_mm_maskmoveu_si128(a2_hi_hi, alpha_mask, ptr + 48);
|
||||
// accumulate 16 alpha 'and' in parallel
|
||||
all_alphas16 = _mm_and_si128(all_alphas16, a0);
|
||||
ptr += 64;
|
||||
}
|
||||
if (i + 8 <= width - 1) {
|
||||
// load 8 alpha bytes
|
||||
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&alpha[i]);
|
||||
const __m128i a1 = _mm_unpacklo_epi8(a0, zero);
|
||||
const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
|
||||
const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
|
||||
// load 8 dst pixels (32 bytes)
|
||||
const __m128i b0_lo = _mm_loadu_si128(out + 0);
|
||||
const __m128i b0_hi = _mm_loadu_si128(out + 1);
|
||||
// mask dst alpha values
|
||||
const __m128i b1_lo = _mm_and_si128(b0_lo, rgb_mask);
|
||||
const __m128i b1_hi = _mm_and_si128(b0_hi, rgb_mask);
|
||||
// combine
|
||||
const __m128i b2_lo = _mm_or_si128(b1_lo, a2_lo);
|
||||
const __m128i b2_hi = _mm_or_si128(b1_hi, a2_hi);
|
||||
// store
|
||||
_mm_storeu_si128(out + 0, b2_lo);
|
||||
_mm_storeu_si128(out + 1, b2_hi);
|
||||
// accumulate eight alpha 'and' in parallel
|
||||
all_alphas = _mm_and_si128(all_alphas, a0);
|
||||
out += 2;
|
||||
_mm_maskmoveu_si128(a2_lo, alpha_mask, ptr);
|
||||
_mm_maskmoveu_si128(a2_hi, alpha_mask, ptr + 16);
|
||||
// accumulate 8 alpha 'and' in parallel
|
||||
all_alphas8 = _mm_and_si128(all_alphas8, a0);
|
||||
i += 8;
|
||||
}
|
||||
for (; i < width; ++i) {
|
||||
const uint32_t alpha_value = alpha[i];
|
||||
@ -68,8 +76,9 @@ static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
|
||||
dst += dst_stride;
|
||||
}
|
||||
// Combine the eight alpha 'and' into a 8-bit mask.
|
||||
alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
|
||||
return (alpha_and != 0xff);
|
||||
alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas8, all_0xff)) & 0xff;
|
||||
return (alpha_and != 0xff ||
|
||||
_mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas16, all_0xff)) != 0xffff);
|
||||
}
|
||||
|
||||
static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
|
||||
|
@ -354,8 +354,8 @@ static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_C(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_C(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
int n;
|
||||
res->last = -1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
|
@ -96,8 +96,8 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
const int16_t* p_coeffs = (int16_t*)coeffs;
|
||||
int temp0, temp1, temp2, n, n1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
|
@ -19,8 +19,8 @@
|
||||
static const uint8_t position[16] = { 1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16 };
|
||||
|
||||
static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_NEON(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
const int16x8_t minus_one = vdupq_n_s16(-1);
|
||||
const int16x8_t coeffs_0 = vld1q_s16(coeffs);
|
||||
const int16x8_t coeffs_1 = vld1q_s16(coeffs + 8);
|
||||
|
@ -22,8 +22,8 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
|
||||
const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
|
||||
// Use SSE2 to compare 16 values with a single instruction.
|
||||
|
@ -56,6 +56,11 @@
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1700 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_AVX2 // Visual C++ AVX2 targets
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
|
||||
@ -80,6 +85,16 @@
|
||||
#define WEBP_HAVE_SSE41
|
||||
#endif
|
||||
|
||||
#if (defined(__AVX2__) || defined(WEBP_MSC_AVX2)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_AVX2))
|
||||
#define WEBP_USE_AVX2
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_AVX2) && !defined(WEBP_HAVE_AVX2)
|
||||
#define WEBP_HAVE_AVX2
|
||||
#endif
|
||||
|
||||
#undef WEBP_MSC_AVX2
|
||||
#undef WEBP_MSC_SSE41
|
||||
#undef WEBP_MSC_SSE2
|
||||
|
||||
|
@ -38,7 +38,8 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
} while (0)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -82,7 +83,8 @@ static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
|
||||
// Simplified transform when only in[0], in[1] and in[4] are non-zero
|
||||
static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -95,7 +97,8 @@ static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
#undef STORE2
|
||||
|
||||
static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne_C(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne_C(in + 16, dst + 4);
|
||||
@ -103,13 +106,15 @@ static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformUV_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformUV_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
VP8Transform(in + 0 * 16, dst, 1);
|
||||
VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int DC = in[0] + 4;
|
||||
int i, j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
@ -120,7 +125,8 @@ static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDCUV_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
|
||||
if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
|
||||
if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
|
||||
@ -133,7 +139,8 @@ static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
// Paragraph 14.3
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int tmp[16];
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -161,7 +168,7 @@ static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
VP8WHT VP8TransformWHT;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Intra predictions
|
||||
@ -661,32 +668,32 @@ static void HFilter16i_C(uint8_t* p, int stride,
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -694,8 +701,8 @@ static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
static void DitherCombine8x8_C(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
|
||||
int i, j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -730,8 +737,8 @@ VP8SimpleFilterFunc VP8SimpleHFilter16;
|
||||
VP8SimpleFilterFunc VP8SimpleVFilter16i;
|
||||
VP8SimpleFilterFunc VP8SimpleHFilter16i;
|
||||
|
||||
void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride);
|
||||
void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride);
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8DspInitSSE2(void);
|
||||
|
@ -133,26 +133,26 @@ static void HFilter16(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -215,7 +215,8 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14;
|
||||
@ -532,7 +533,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
|
@ -21,7 +21,8 @@
|
||||
static const int kC1 = WEBP_TRANSFORM_AC3_C1;
|
||||
static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
|
||||
|
||||
__asm__ volatile (
|
||||
@ -45,7 +46,8 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int a = in[0] + 4;
|
||||
int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -81,7 +83,8 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
|
||||
|
||||
@ -148,7 +151,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
@ -434,14 +438,14 @@ static void HFilter16(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -465,14 +469,14 @@ static void HFilter16i(uint8_t* p, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
@ -38,7 +38,8 @@
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
@ -65,14 +66,16 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
v8i16 input0, input1;
|
||||
const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
|
||||
const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||
@ -114,13 +117,15 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
out[240] = __msa_copy_s_h(out1, 7);
|
||||
}
|
||||
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int DC = (in[0] + 4) >> 3;
|
||||
const v8i16 tmp0 = __msa_fill_h(DC);
|
||||
ADDBLK_ST4x4_UB(tmp0, tmp0, tmp0, tmp0, dst, BPS);
|
||||
}
|
||||
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -475,8 +480,8 @@ static void HFilter16i(uint8_t* src_y, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variants, for chroma filtering
|
||||
static void VFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
int stride, int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint8_t* ptmp_src_u = src_u - 4 * stride;
|
||||
uint8_t* ptmp_src_v = src_v - 4 * stride;
|
||||
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
|
||||
@ -520,8 +525,8 @@ static void VFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
SD(q2_d, ptmp_src_v);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
int stride, int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint8_t* ptmp_src_u = src_u - 4;
|
||||
uint8_t* ptmp_src_v = src_v - 4;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
@ -556,7 +561,8 @@ static void HFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
ST6x4_UB(tmp7, 0, tmp5, 4, ptmp_src_v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
|
||||
uint8_t* WEBP_RESTRICT src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint64_t p1_d, p0_d, q0_d, q1_d;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
@ -587,7 +593,8 @@ static void VFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
SD4(q1_d, q0_d, p0_d, p1_d, src_v, -stride);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
|
||||
uint8_t* WEBP_RESTRICT src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
|
@ -916,8 +916,8 @@ static void HFilter16i_NEON(uint8_t* p, int stride,
|
||||
#endif // !WORK_AROUND_GCC
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
|
||||
{
|
||||
@ -932,7 +932,8 @@ static void VFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
Store8x2x2_NEON(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
|
||||
}
|
||||
}
|
||||
static void VFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
static void VFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
u += 4 * stride;
|
||||
@ -949,8 +950,8 @@ static void VFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
}
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
static void HFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
|
||||
{
|
||||
@ -964,7 +965,8 @@ static void HFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void HFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
static void HFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
u += 4;
|
||||
@ -1041,7 +1043,8 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass_NEON(&rows);
|
||||
@ -1051,7 +1054,8 @@ static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
|
||||
#else
|
||||
|
||||
static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int kBPS = BPS;
|
||||
// kC1, kC2. Padded because vld1.16 loads 8 bytes
|
||||
const int16_t constants[4] = { kC1, kC2, 0, 0 };
|
||||
@ -1184,14 +1188,16 @@ static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void TransformTwo_NEON(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne_NEON(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne_NEON(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformDC_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int16x8_t DC = vdupq_n_s16(in[0]);
|
||||
Add4x4_NEON(DC, DC, dst);
|
||||
}
|
||||
@ -1205,7 +1211,8 @@ static void TransformDC_NEON(const int16_t* in, uint8_t* dst) {
|
||||
*dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
|
||||
} while (0)
|
||||
|
||||
static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int32x4x4_t tmp;
|
||||
|
||||
{
|
||||
@ -1256,7 +1263,8 @@ static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void TransformAC3_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int16x4_t A = vld1_dup_s16(in);
|
||||
const int16x4_t c4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
|
||||
const int16x4_t d4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
|
||||
@ -1300,18 +1308,19 @@ static void DC4_NEON(uint8_t* dst) { // DC
|
||||
static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, int size) {
|
||||
const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1); // top-left pixel 'A[-1]'
|
||||
const uint8x8_t T = vld1_u8(dst - BPS); // top row 'A[0..3]'
|
||||
const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL)); // A[c] - A[-1]
|
||||
const uint16x8_t d = vsubl_u8(T, TL); // A[c] - A[-1]
|
||||
int y;
|
||||
for (y = 0; y < size; y += 4) {
|
||||
// left edge
|
||||
const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1));
|
||||
const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1));
|
||||
const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1));
|
||||
const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1));
|
||||
const int16x8_t r0 = vaddq_s16(L0, d); // L[r] + A[c] - A[-1]
|
||||
const int16x8_t r1 = vaddq_s16(L1, d);
|
||||
const int16x8_t r2 = vaddq_s16(L2, d);
|
||||
const int16x8_t r3 = vaddq_s16(L3, d);
|
||||
const uint8x8_t L0 = vld1_dup_u8(dst + 0 * BPS - 1);
|
||||
const uint8x8_t L1 = vld1_dup_u8(dst + 1 * BPS - 1);
|
||||
const uint8x8_t L2 = vld1_dup_u8(dst + 2 * BPS - 1);
|
||||
const uint8x8_t L3 = vld1_dup_u8(dst + 3 * BPS - 1);
|
||||
// L[r] + A[c] - A[-1]
|
||||
const int16x8_t r0 = vreinterpretq_s16_u16(vaddw_u8(d, L0));
|
||||
const int16x8_t r1 = vreinterpretq_s16_u16(vaddw_u8(d, L1));
|
||||
const int16x8_t r2 = vreinterpretq_s16_u16(vaddw_u8(d, L2));
|
||||
const int16x8_t r3 = vreinterpretq_s16_u16(vaddw_u8(d, L3));
|
||||
// Saturate and store the result.
|
||||
const uint32x2_t r0_u32 = vreinterpret_u32_u8(vqmovun_s16(r0));
|
||||
const uint32x2_t r1_u32 = vreinterpret_u32_u8(vqmovun_s16(r1));
|
||||
@ -1572,23 +1581,24 @@ static void TM16_NEON(uint8_t* dst) {
|
||||
const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1); // top-left pixel 'A[-1]'
|
||||
const uint8x16_t T = vld1q_u8(dst - BPS); // top row 'A[0..15]'
|
||||
// A[c] - A[-1]
|
||||
const int16x8_t d_lo = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(T), TL));
|
||||
const int16x8_t d_hi = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(T), TL));
|
||||
const uint16x8_t d_lo = vsubl_u8(vget_low_u8(T), TL);
|
||||
const uint16x8_t d_hi = vsubl_u8(vget_high_u8(T), TL);
|
||||
int y;
|
||||
for (y = 0; y < 16; y += 4) {
|
||||
// left edge
|
||||
const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1));
|
||||
const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1));
|
||||
const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1));
|
||||
const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1));
|
||||
const int16x8_t r0_lo = vaddq_s16(L0, d_lo); // L[r] + A[c] - A[-1]
|
||||
const int16x8_t r1_lo = vaddq_s16(L1, d_lo);
|
||||
const int16x8_t r2_lo = vaddq_s16(L2, d_lo);
|
||||
const int16x8_t r3_lo = vaddq_s16(L3, d_lo);
|
||||
const int16x8_t r0_hi = vaddq_s16(L0, d_hi);
|
||||
const int16x8_t r1_hi = vaddq_s16(L1, d_hi);
|
||||
const int16x8_t r2_hi = vaddq_s16(L2, d_hi);
|
||||
const int16x8_t r3_hi = vaddq_s16(L3, d_hi);
|
||||
const uint8x8_t L0 = vld1_dup_u8(dst + 0 * BPS - 1);
|
||||
const uint8x8_t L1 = vld1_dup_u8(dst + 1 * BPS - 1);
|
||||
const uint8x8_t L2 = vld1_dup_u8(dst + 2 * BPS - 1);
|
||||
const uint8x8_t L3 = vld1_dup_u8(dst + 3 * BPS - 1);
|
||||
// L[r] + A[c] - A[-1]
|
||||
const int16x8_t r0_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L0));
|
||||
const int16x8_t r1_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L1));
|
||||
const int16x8_t r2_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L2));
|
||||
const int16x8_t r3_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L3));
|
||||
const int16x8_t r0_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L0));
|
||||
const int16x8_t r1_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L1));
|
||||
const int16x8_t r2_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L2));
|
||||
const int16x8_t r3_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L3));
|
||||
// Saturate and store the result.
|
||||
const uint8x16_t row0 = vcombine_u8(vqmovun_s16(r0_lo), vqmovun_s16(r0_hi));
|
||||
const uint8x16_t row1 = vcombine_u8(vqmovun_s16(r1_lo), vqmovun_s16(r1_hi));
|
||||
|
@ -30,7 +30,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void Transform_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -197,7 +198,8 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i A = _mm_set1_epi16(in[0] + 4);
|
||||
const __m128i c4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
|
||||
const __m128i d4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
|
||||
@ -792,8 +794,8 @@ static void HFilter16i_SSE2(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, p2, p1, p0, q0, q1, q2;
|
||||
|
||||
@ -817,8 +819,8 @@ static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q2, u, v, 2 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
@ -837,7 +839,8 @@ static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
@ -863,7 +866,8 @@ static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q1, u, v, 1 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
|
153
src/dsp/dsp.h
153
src/dsp/dsp.h
@ -60,53 +60,66 @@ extern "C" {
|
||||
// Transforms
|
||||
// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
|
||||
// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
|
||||
typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two);
|
||||
typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
|
||||
typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
|
||||
typedef void (*VP8Idct)(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two);
|
||||
typedef void (*VP8Fdct)(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out);
|
||||
typedef void (*VP8WHT)(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out);
|
||||
extern VP8Idct VP8ITransform;
|
||||
extern VP8Fdct VP8FTransform;
|
||||
extern VP8Fdct VP8FTransform2; // performs two transforms at a time
|
||||
extern VP8WHT VP8FTransformWHT;
|
||||
// Predictions
|
||||
// *dst is the destination block. *top and *left can be NULL.
|
||||
typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top);
|
||||
typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top);
|
||||
typedef void (*VP8IntraPreds)(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top);
|
||||
typedef void (*VP8Intra4Preds)(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top);
|
||||
extern VP8Intra4Preds VP8EncPredLuma4;
|
||||
extern VP8IntraPreds VP8EncPredLuma16;
|
||||
extern VP8IntraPreds VP8EncPredChroma8;
|
||||
|
||||
typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
|
||||
typedef int (*VP8Metric)(const uint8_t* WEBP_RESTRICT pix,
|
||||
const uint8_t* WEBP_RESTRICT ref);
|
||||
extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
|
||||
typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
|
||||
const uint16_t* const weights);
|
||||
typedef int (*VP8WMetric)(const uint8_t* WEBP_RESTRICT pix,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint16_t* WEBP_RESTRICT const weights);
|
||||
// The weights for VP8TDisto4x4 and VP8TDisto16x16 contain a row-major
|
||||
// 4 by 4 symmetric matrix.
|
||||
extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
|
||||
|
||||
// Compute the average (DC) of four 4x4 blocks.
|
||||
// Each sub-4x4 block #i sum is stored in dc[i].
|
||||
typedef void (*VP8MeanMetric)(const uint8_t* ref, uint32_t dc[4]);
|
||||
typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref,
|
||||
uint32_t dc[4]);
|
||||
extern VP8MeanMetric VP8Mean16x4;
|
||||
|
||||
typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
|
||||
typedef void (*VP8BlockCopy)(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
extern VP8BlockCopy VP8Copy4x4;
|
||||
extern VP8BlockCopy VP8Copy16x8;
|
||||
// Quantization
|
||||
struct VP8Matrix; // forward declaration
|
||||
typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* const mtx);
|
||||
typedef int (*VP8QuantizeBlock)(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
|
||||
typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
|
||||
const struct VP8Matrix* const mtx);
|
||||
typedef int (*VP8Quantize2Blocks)(
|
||||
int16_t in[32], int16_t out[32],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
|
||||
extern VP8QuantizeBlock VP8EncQuantizeBlock;
|
||||
extern VP8Quantize2Blocks VP8EncQuantize2Blocks;
|
||||
|
||||
// specific to 2nd transform:
|
||||
typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* const mtx);
|
||||
typedef int (*VP8QuantizeBlockWHT)(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
|
||||
|
||||
extern const int VP8DspScan[16 + 4 + 4];
|
||||
@ -118,9 +131,10 @@ typedef struct {
|
||||
int max_value;
|
||||
int last_non_zero;
|
||||
} VP8Histogram;
|
||||
typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
|
||||
typedef void (*VP8CHisto)(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo);
|
||||
VP8Histogram* WEBP_RESTRICT const histo);
|
||||
extern VP8CHisto VP8CollectHistogram;
|
||||
// General-purpose util function to help VP8CollectHistogram().
|
||||
void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
@ -138,8 +152,9 @@ extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
|
||||
extern const uint8_t VP8EncBands[16 + 1];
|
||||
|
||||
struct VP8Residual;
|
||||
typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
|
||||
struct VP8Residual* const res);
|
||||
typedef void (*VP8SetResidualCoeffsFunc)(
|
||||
const int16_t* WEBP_RESTRICT const coeffs,
|
||||
struct VP8Residual* WEBP_RESTRICT const res);
|
||||
extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
||||
|
||||
// Cost calculation function.
|
||||
@ -193,9 +208,11 @@ void VP8SSIMDspInit(void);
|
||||
//------------------------------------------------------------------------------
|
||||
// Decoding
|
||||
|
||||
typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
|
||||
typedef void (*VP8DecIdct)(const int16_t* WEBP_RESTRICT coeffs,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
// when doing two transforms, coeffs is actually int16_t[2][16].
|
||||
typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
|
||||
typedef void (*VP8DecIdct2)(const int16_t* WEBP_RESTRICT coeffs,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two);
|
||||
extern VP8DecIdct2 VP8Transform;
|
||||
extern VP8DecIdct VP8TransformAC3;
|
||||
extern VP8DecIdct VP8TransformUV;
|
||||
@ -233,7 +250,8 @@ extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
|
||||
// regular filter (on both macroblock edges and inner edges)
|
||||
typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
|
||||
int thresh, int ithresh, int hev_t);
|
||||
typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
|
||||
typedef void (*VP8ChromaFilterFunc)(uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int stride,
|
||||
int thresh, int ithresh, int hev_t);
|
||||
// on outer edge
|
||||
extern VP8LumaFilterFunc VP8VFilter16;
|
||||
@ -253,8 +271,8 @@ extern VP8ChromaFilterFunc VP8HFilter8i;
|
||||
#define VP8_DITHER_DESCALE_ROUNDER (1 << (VP8_DITHER_DESCALE - 1))
|
||||
#define VP8_DITHER_AMP_BITS 7
|
||||
#define VP8_DITHER_AMP_CENTER (1 << VP8_DITHER_AMP_BITS)
|
||||
extern void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride);
|
||||
extern void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride);
|
||||
|
||||
// must be called before anything using the above
|
||||
void VP8DspInit(void);
|
||||
@ -267,10 +285,10 @@ void VP8DspInit(void);
|
||||
// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
|
||||
// bottom_y can be NULL if only one line of output is needed (at top/bottom).
|
||||
typedef void (*WebPUpsampleLinePairFunc)(
|
||||
const uint8_t* top_y, const uint8_t* bottom_y,
|
||||
const uint8_t* top_u, const uint8_t* top_v,
|
||||
const uint8_t* cur_u, const uint8_t* cur_v,
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len);
|
||||
const uint8_t* WEBP_RESTRICT top_y, const uint8_t* WEBP_RESTRICT bottom_y,
|
||||
const uint8_t* WEBP_RESTRICT top_u, const uint8_t* WEBP_RESTRICT top_v,
|
||||
const uint8_t* WEBP_RESTRICT cur_u, const uint8_t* WEBP_RESTRICT cur_v,
|
||||
uint8_t* WEBP_RESTRICT top_dst, uint8_t* WEBP_RESTRICT bottom_dst, int len);
|
||||
|
||||
#ifdef FANCY_UPSAMPLING
|
||||
|
||||
@ -280,13 +298,15 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
|
||||
#endif // FANCY_UPSAMPLING
|
||||
|
||||
// Per-row point-sampling methods.
|
||||
typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len);
|
||||
typedef void (*WebPSamplerRowFunc)(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len);
|
||||
// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
|
||||
void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||
const uint8_t* u, const uint8_t* v, int uv_stride,
|
||||
uint8_t* dst, int dst_stride,
|
||||
void WebPSamplerProcessPlane(const uint8_t* WEBP_RESTRICT y, int y_stride,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v, int uv_stride,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride,
|
||||
int width, int height, WebPSamplerRowFunc func);
|
||||
|
||||
// Sampling functions to convert rows of YUV to RGB(A)
|
||||
@ -298,9 +318,10 @@ extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
|
||||
WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
|
||||
|
||||
// YUV444->RGB converters
|
||||
typedef void (*WebPYUV444Converter)(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len);
|
||||
typedef void (*WebPYUV444Converter)(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len);
|
||||
|
||||
extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
|
||||
@ -316,26 +337,35 @@ void WebPInitYUV444Converters(void);
|
||||
// ARGB -> YUV converters
|
||||
|
||||
// Convert ARGB samples to luma Y.
|
||||
extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
||||
extern void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
// Convert ARGB samples to U/V with downsampling. do_store should be '1' for
|
||||
// even lines and '0' for odd ones. 'src_width' is the original width, not
|
||||
// the U/V one.
|
||||
extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
extern void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store);
|
||||
|
||||
// Convert a row of accumulated (four-values) of rgba32 toward U/V
|
||||
extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
extern void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
|
||||
// Convert RGB or BGR to Y
|
||||
extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
extern void (*WebPConvertRGB24ToY)(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
extern void (*WebPConvertBGR24ToY)(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
|
||||
// used for plain-C fallback.
|
||||
extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
extern void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store);
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
|
||||
// Must be called before using the above.
|
||||
void WebPInitConvertARGBToYUV(void);
|
||||
@ -348,8 +378,9 @@ struct WebPRescaler;
|
||||
// Import a row of data and save its contribution in the rescaler.
|
||||
// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
|
||||
// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
|
||||
typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
typedef void (*WebPRescalerImportRowFunc)(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
|
||||
extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
|
||||
extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
|
||||
@ -362,16 +393,19 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
|
||||
extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
|
||||
|
||||
// Plain-C implementation, as fall-back.
|
||||
extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowExpand_C(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
extern void WebPRescalerImportRowShrink_C(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
|
||||
|
||||
// Main entry calls:
|
||||
extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRow(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
// Export one row (starting at x_out position) from rescaler.
|
||||
extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
|
||||
|
||||
@ -480,8 +514,9 @@ typedef enum { // Filter types.
|
||||
WEBP_FILTER_FAST
|
||||
} WEBP_FILTER_TYPE;
|
||||
|
||||
typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
|
||||
int stride, uint8_t* out);
|
||||
typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out);
|
||||
// In-place un-filtering.
|
||||
// Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
|
||||
typedef void (*WebPUnfilterFunc)(const uint8_t* prev_line, const uint8_t* preds,
|
||||
|
143
src/dsp/enc.c
143
src/dsp/enc.c
@ -59,9 +59,10 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_C(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
@ -109,8 +110,9 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
|
||||
#define STORE(x, y, v) \
|
||||
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -146,7 +148,9 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
static void ITransform_C(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
@ -154,7 +158,9 @@ static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
int tmp[16];
|
||||
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
|
||||
@ -184,14 +190,16 @@ static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform2_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
VP8FTransform(src, ref, out);
|
||||
VP8FTransform(src + 4, ref + 4, out + 16);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void FTransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_C(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
// input is 12b signed
|
||||
int32_t tmp[16];
|
||||
int i;
|
||||
@ -234,8 +242,9 @@ static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
int j;
|
||||
if (top != NULL) {
|
||||
for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
|
||||
@ -244,8 +253,9 @@ static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
int size) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < size; ++j) {
|
||||
@ -256,8 +266,9 @@ static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top, int size) {
|
||||
int y;
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
@ -286,8 +297,9 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top,
|
||||
static WEBP_INLINE void DCMode(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size, int round, int shift) {
|
||||
int DC = 0;
|
||||
int j;
|
||||
@ -312,8 +324,9 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
@ -332,22 +345,28 @@ static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_C(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
static void Intra16Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode(I16DC16 + dst, left, top, 16, 16, 5);
|
||||
VerticalPred(I16VE16 + dst, top, 16);
|
||||
HorizontalPred(I16HE16 + dst, left, 16);
|
||||
TrueMotion(I16TM16 + dst, left, top, 16);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 4x4 prediction
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64 || BPS != 32
|
||||
|
||||
#define DST(x, y) dst[(x) + (y) * BPS]
|
||||
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
// vertical
|
||||
static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const uint8_t vals[4] = {
|
||||
AVG3(top[-1], top[0], top[1]),
|
||||
AVG3(top[ 0], top[1], top[2]),
|
||||
@ -360,7 +379,8 @@ static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
// horizontal
|
||||
static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -372,14 +392,14 @@ static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -398,7 +418,7 @@ static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 0) = AVG3(D, C, B);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -416,7 +436,7 @@ static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 3) = AVG3(G, H, H);
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -438,7 +458,7 @@ static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -460,7 +480,7 @@ static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -475,7 +495,7 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -498,7 +518,7 @@ static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int x, y;
|
||||
const uint8_t* const clip = clip1 + 255 - top[-1];
|
||||
for (y = 0; y < 4; ++y) {
|
||||
@ -516,7 +536,8 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -529,11 +550,14 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
HU4(I4HU4 + dst, top);
|
||||
}
|
||||
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64 || BPS != 32
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Metric
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int w, int h) {
|
||||
int count = 0;
|
||||
int y, x;
|
||||
@ -548,21 +572,25 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 16, 16);
|
||||
}
|
||||
static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 16, 8);
|
||||
}
|
||||
static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 8, 8);
|
||||
}
|
||||
static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 4, 4);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_C(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
int k, x, y;
|
||||
for (k = 0; k < 4; ++k) {
|
||||
uint32_t avg = 0;
|
||||
@ -586,7 +614,8 @@ static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
static int TTransform(const uint8_t* WEBP_RESTRICT in,
|
||||
const uint16_t* WEBP_RESTRICT w) {
|
||||
int sum = 0;
|
||||
int tmp[16];
|
||||
int i;
|
||||
@ -620,15 +649,17 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_C(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int sum1 = TTransform(a, w);
|
||||
const int sum2 = TTransform(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_C(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -644,13 +675,14 @@ static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
|
||||
// Quantization
|
||||
//
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static const uint8_t kZigzag[16] = {
|
||||
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
|
||||
};
|
||||
|
||||
// Simple quantization
|
||||
static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int last = -1;
|
||||
int n;
|
||||
for (n = 0; n < 16; ++n) {
|
||||
@ -675,9 +707,8 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
return (last >= 0);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -688,7 +719,8 @@ static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
//------------------------------------------------------------------------------
|
||||
// Block copy
|
||||
|
||||
static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||
static WEBP_INLINE void Copy(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int w, int h) {
|
||||
int y;
|
||||
for (y = 0; y < h; ++y) {
|
||||
memcpy(dst, src, w);
|
||||
@ -697,11 +729,13 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||
}
|
||||
}
|
||||
|
||||
static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy4x4_C(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
Copy(src, dst, 4, 4);
|
||||
}
|
||||
|
||||
static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy16x8_C(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
Copy(src, dst, 16, 8);
|
||||
}
|
||||
|
||||
@ -760,14 +794,19 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
VP8EncQuantizeBlock = QuantizeBlock_C;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_C;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_C;
|
||||
#endif
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64 || BPS != 32
|
||||
VP8EncPredLuma4 = Intra4Preds_C;
|
||||
#endif
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
VP8EncPredLuma16 = Intra16Preds_C;
|
||||
#endif
|
||||
|
||||
VP8FTransform2 = FTransform2_C;
|
||||
VP8EncPredLuma4 = Intra4Preds_C;
|
||||
VP8EncPredLuma16 = Intra16Preds_C;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_C;
|
||||
VP8Mean16x4 = Mean16x4_C;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_C;
|
||||
VP8Copy4x4 = Copy4x4_C;
|
||||
VP8Copy16x8 = Copy16x8_C;
|
||||
|
||||
|
@ -109,9 +109,9 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
"sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
|
||||
int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
|
||||
@ -141,8 +141,9 @@ static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne_MIPS32(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_MIPS32(ref + 4, in + 16, dst + 4);
|
||||
@ -236,7 +237,7 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -358,8 +359,9 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
"msub %[temp6], %[temp0] \n\t" \
|
||||
"msub %[temp7], %[temp1] \n\t"
|
||||
|
||||
static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int tmp[32];
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
|
||||
@ -393,8 +395,9 @@ static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -475,8 +478,9 @@ static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
|
||||
int temp17, temp18, temp19, temp20;
|
||||
@ -537,7 +541,8 @@ static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
|
||||
GET_SSE_INNER(D, D + 1, D + 2, D + 3)
|
||||
|
||||
static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -571,7 +576,8 @@ static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -597,7 +603,8 @@ static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -619,7 +626,8 @@ static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
|
@ -141,8 +141,9 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const int c2217 = 2217;
|
||||
const int c5352 = 5352;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
@ -171,8 +172,9 @@ static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
|
||||
|
||||
@ -239,16 +241,18 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPSdspR2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
static void ITransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
|
||||
|
||||
@ -314,9 +318,9 @@ static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
return abs(temp3 - temp17) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -367,8 +371,8 @@ static int Disto16x16_MIPSdspR2(const uint8_t* const a,
|
||||
} while (0)
|
||||
|
||||
#define VERTICAL_PRED(DST, TOP, SIZE) \
|
||||
static WEBP_INLINE void VerticalPred##SIZE(uint8_t* (DST), \
|
||||
const uint8_t* (TOP)) { \
|
||||
static WEBP_INLINE void VerticalPred##SIZE( \
|
||||
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (TOP)) { \
|
||||
int j; \
|
||||
if ((TOP)) { \
|
||||
for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE)); \
|
||||
@ -383,8 +387,8 @@ VERTICAL_PRED(dst, top, 16)
|
||||
#undef VERTICAL_PRED
|
||||
|
||||
#define HORIZONTAL_PRED(DST, LEFT, SIZE) \
|
||||
static WEBP_INLINE void HorizontalPred##SIZE(uint8_t* (DST), \
|
||||
const uint8_t* (LEFT)) { \
|
||||
static WEBP_INLINE void HorizontalPred##SIZE( \
|
||||
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (LEFT)) { \
|
||||
if (LEFT) { \
|
||||
int j; \
|
||||
for (j = 0; j < (SIZE); ++j) { \
|
||||
@ -451,8 +455,9 @@ HORIZONTAL_PRED(dst, left, 16)
|
||||
} while (0)
|
||||
|
||||
#define TRUE_MOTION(DST, LEFT, TOP, SIZE) \
|
||||
static WEBP_INLINE void TrueMotion##SIZE(uint8_t* (DST), const uint8_t* (LEFT),\
|
||||
const uint8_t* (TOP)) { \
|
||||
static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST), \
|
||||
const uint8_t* WEBP_RESTRICT (LEFT), \
|
||||
const uint8_t* WEBP_RESTRICT (TOP)) { \
|
||||
if ((LEFT) != NULL) { \
|
||||
if ((TOP) != NULL) { \
|
||||
CLIP_TO_DST((DST), (LEFT), (TOP), (SIZE)); \
|
||||
@ -480,8 +485,9 @@ TRUE_MOTION(dst, left, top, 16)
|
||||
#undef CLIP_8B_TO_DST
|
||||
#undef CLIPPING
|
||||
|
||||
static WEBP_INLINE void DCMode16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int DC, DC1;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
|
||||
@ -543,8 +549,9 @@ static WEBP_INLINE void DCMode16(uint8_t* dst, const uint8_t* left,
|
||||
FILL_8_OR_16(dst, DC, 16);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int DC, DC1;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
|
||||
@ -588,7 +595,7 @@ static WEBP_INLINE void DCMode8(uint8_t* dst, const uint8_t* left,
|
||||
FILL_8_OR_16(dst, DC, 8);
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], 0(%[top]) \n\t"
|
||||
@ -609,7 +616,7 @@ static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int a10, a32, temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
const int c35 = 0xff00ff;
|
||||
__asm__ volatile (
|
||||
@ -664,7 +671,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VE4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], -1(%[top]) \n\t"
|
||||
@ -695,7 +702,7 @@ static void VE4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HE4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], -4(%[top]) \n\t"
|
||||
@ -731,7 +738,7 @@ static void HE4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int temp6, temp7, temp8, temp9, temp10, temp11;
|
||||
__asm__ volatile(
|
||||
@ -780,7 +787,7 @@ static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -830,7 +837,7 @@ static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int temp6, temp7, temp8, temp9, temp10, temp11;
|
||||
__asm__ volatile(
|
||||
@ -877,7 +884,7 @@ static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -926,7 +933,7 @@ static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -974,7 +981,7 @@ static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
__asm__ volatile (
|
||||
"ulw %[temp0], -5(%[top]) \n\t"
|
||||
@ -1013,8 +1020,9 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DCMode8(C8DC8 + dst, left, top);
|
||||
VerticalPred8(C8VE8 + dst, top);
|
||||
@ -1033,8 +1041,9 @@ static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode16(I16DC16 + dst, left, top);
|
||||
VerticalPred16(I16VE16 + dst, top);
|
||||
HorizontalPred16(I16HE16 + dst, left);
|
||||
@ -1043,7 +1052,8 @@ static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -1079,7 +1089,8 @@ static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
GET_SSE_INNER(C) \
|
||||
GET_SSE_INNER(D)
|
||||
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1109,7 +1120,8 @@ static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1131,7 +1143,8 @@ static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1149,7 +1162,8 @@ static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1273,7 +1287,7 @@ static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
"3: \n\t"
|
||||
|
||||
static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
|
||||
int sign, coeff, level;
|
||||
int max_level = MAX_LEVEL;
|
||||
@ -1314,7 +1328,7 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -1360,7 +1374,8 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
"usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
|
||||
"usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
|
||||
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
|
||||
|
@ -41,8 +41,9 @@
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
@ -69,16 +70,18 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void ITransform_MSA(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform_MSA(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
uint64_t out0, out1, out2, out3;
|
||||
uint32_t in0, in1, in2, in3;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
@ -131,7 +134,8 @@ static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
SD4(out0, out1, out2, out3, out, 8);
|
||||
}
|
||||
|
||||
static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_MSA(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
v8i16 in0 = { 0 };
|
||||
v8i16 in1 = { 0 };
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3;
|
||||
@ -168,7 +172,8 @@ static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
ST_SH2(out0, out1, out, 8);
|
||||
}
|
||||
|
||||
static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
static int TTransform_MSA(const uint8_t* WEBP_RESTRICT in,
|
||||
const uint16_t* WEBP_RESTRICT w) {
|
||||
int sum;
|
||||
uint32_t in0_m, in1_m, in2_m, in3_m;
|
||||
v16i8 src0 = { 0 };
|
||||
@ -200,15 +205,17 @@ static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MSA(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int sum1 = TTransform_MSA(a, w);
|
||||
const int sum2 = TTransform_MSA(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MSA(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -259,7 +266,9 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
|
||||
#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
// vertical
|
||||
static WEBP_INLINE void VE4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top - 1);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
@ -272,7 +281,9 @@ static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
SW4(out, out, out, out, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
// horizontal
|
||||
static WEBP_INLINE void HE4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -284,7 +295,8 @@ static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
@ -293,7 +305,8 @@ static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
SW4(dc, dc, dc, dc, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16u8 A2 = { 0 };
|
||||
const uint64_t val_m = LD(top - 5);
|
||||
const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
|
||||
@ -313,7 +326,8 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
SW4(val3, val2, val1, val0, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void LD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
@ -333,7 +347,8 @@ static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
SW4(val0, val1, val2, val3, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VR4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -354,7 +369,8 @@ static WEBP_INLINE void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VL4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -375,7 +391,8 @@ static WEBP_INLINE void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -390,7 +407,8 @@ static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -411,7 +429,8 @@ static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void TM4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16i8 zero = { 0 };
|
||||
const v8i16 TL = (v8i16)__msa_fill_h(top[-1]);
|
||||
const v8i16 L0 = (v8i16)__msa_fill_h(top[-2]);
|
||||
@ -431,7 +450,8 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
#undef AVG3
|
||||
#undef AVG2
|
||||
|
||||
static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -451,7 +471,8 @@ static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
|
||||
ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void VerticalPred16x16(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VerticalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
const v16u8 out = LD_UB(top);
|
||||
STORE16x16(out, dst);
|
||||
@ -461,8 +482,8 @@ static WEBP_INLINE void VerticalPred16x16(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred16x16(uint8_t* dst,
|
||||
const uint8_t* left) {
|
||||
static WEBP_INLINE void HorizontalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < 16; j += 4) {
|
||||
@ -480,8 +501,9 @@ static WEBP_INLINE void HorizontalPred16x16(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion16x16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void TrueMotion16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
int j;
|
||||
@ -519,8 +541,9 @@ static WEBP_INLINE void TrueMotion16x16(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int DC;
|
||||
v16u8 out;
|
||||
if (top != NULL && left != NULL) {
|
||||
@ -548,8 +571,9 @@ static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
|
||||
STORE16x16(out, dst);
|
||||
}
|
||||
|
||||
static void Intra16Preds_MSA(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode16x16(I16DC16 + dst, left, top);
|
||||
VerticalPred16x16(I16VE16 + dst, top);
|
||||
HorizontalPred16x16(I16HE16 + dst, left);
|
||||
@ -574,7 +598,8 @@ static void Intra16Preds_MSA(uint8_t* dst,
|
||||
SD4(out, out, out, out, dst + 4 * BPS, BPS); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void VerticalPred8x8(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VerticalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
const uint64_t out = LD(top);
|
||||
STORE8x8(out, dst);
|
||||
@ -584,7 +609,8 @@ static WEBP_INLINE void VerticalPred8x8(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred8x8(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HorizontalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < 8; j += 4) {
|
||||
@ -606,8 +632,9 @@ static WEBP_INLINE void HorizontalPred8x8(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void TrueMotion8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
int j;
|
||||
@ -646,8 +673,9 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
uint64_t out;
|
||||
v16u8 src = { 0 };
|
||||
if (top != NULL && left != NULL) {
|
||||
@ -670,8 +698,9 @@ static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
STORE8x8(out, dst);
|
||||
}
|
||||
|
||||
static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DCMode8x8(C8DC8 + dst, left, top);
|
||||
VerticalPred8x8(C8VE8 + dst, top);
|
||||
@ -712,7 +741,8 @@ static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -739,7 +769,8 @@ static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -758,7 +789,8 @@ static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -778,7 +810,8 @@ static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum = 0;
|
||||
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
|
||||
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
|
||||
@ -801,7 +834,7 @@ static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
// Quantization
|
||||
|
||||
static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int sum;
|
||||
v8i16 in0, in1, sh0, sh1, out0, out1;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
|
||||
@ -854,7 +887,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
|
@ -60,8 +60,8 @@ static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
|
||||
|
||||
static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
|
||||
const int16x8_t row23,
|
||||
const uint8_t* const ref,
|
||||
uint8_t* const dst) {
|
||||
const uint8_t* WEBP_RESTRICT const ref,
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
uint32x2_t dst01 = vdup_n_u32(0);
|
||||
uint32x2_t dst23 = vdup_n_u32(0);
|
||||
|
||||
@ -120,8 +120,9 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass_NEON(&rows);
|
||||
@ -131,8 +132,9 @@ static void ITransformOne_NEON(const uint8_t* ref,
|
||||
|
||||
#else
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int kBPS = BPS;
|
||||
const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
|
||||
|
||||
@ -247,8 +249,9 @@ static void ITransformOne_NEON(const uint8_t* ref,
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void ITransform_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void ITransform_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne_NEON(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_NEON(ref + 4, in + 16, dst + 4);
|
||||
@ -294,8 +297,9 @@ static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
|
||||
return vreinterpretq_s16_u16(vsubl_u8(a, b));
|
||||
}
|
||||
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int16x8_t d0d1, d3d2; // working 4x4 int16 variables
|
||||
{
|
||||
const uint8x16_t S0 = Load4x4_NEON(src);
|
||||
@ -364,8 +368,9 @@ static const int32_t kCoeff32[] = {
|
||||
51000, 51000, 51000, 51000
|
||||
};
|
||||
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const int kBPS = BPS;
|
||||
const uint8_t* src_ptr = src;
|
||||
const uint8_t* ref_ptr = ref;
|
||||
@ -484,7 +489,8 @@ static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
src += stride; \
|
||||
} while (0)
|
||||
|
||||
static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
|
||||
static void FTransformWHT_NEON(const int16_t* WEBP_RESTRICT src,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const int stride = 16;
|
||||
const int16x4_t zero = vdup_n_s16(0);
|
||||
int32x4x4_t tmp0;
|
||||
@ -659,8 +665,9 @@ static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
|
||||
@ -701,8 +708,9 @@ static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
}
|
||||
#undef LOAD_LANE_32b
|
||||
|
||||
static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -715,9 +723,10 @@ static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -747,9 +756,9 @@ static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
uint32x4_t* const sum) {
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(
|
||||
const uint8_t* WEBP_RESTRICT const a, const uint8_t* WEBP_RESTRICT const b,
|
||||
uint32x4_t* const sum) {
|
||||
const uint8x16_t a0 = vld1q_u8(a);
|
||||
const uint8x16_t b0 = vld1q_u8(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
@ -775,7 +784,8 @@ static int SumToInt_NEON(uint32x4_t sum) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 16; ++y) {
|
||||
@ -784,7 +794,8 @@ static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
@ -793,7 +804,8 @@ static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
@ -806,7 +818,8 @@ static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
const uint8x16_t a0 = Load4x4_NEON(a);
|
||||
const uint8x16_t b0 = Load4x4_NEON(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
@ -825,8 +838,9 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
// Compilation with gcc-4.6.x is problematic for now.
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
|
||||
static int16x8_t Quantize_NEON(int16_t* const in,
|
||||
const VP8Matrix* const mtx, int offset) {
|
||||
static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx,
|
||||
int offset) {
|
||||
const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
|
||||
const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
|
||||
const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
|
||||
@ -860,7 +874,7 @@ static const uint8_t kShuffles[4][8] = {
|
||||
};
|
||||
|
||||
static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const int16x8_t out0 = Quantize_NEON(in, mtx, 0);
|
||||
const int16x8_t out1 = Quantize_NEON(in, mtx, 8);
|
||||
uint8x8x4_t shuffles;
|
||||
@ -902,7 +916,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -911,6 +925,283 @@ static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
|
||||
#endif // !WORK_AROUND_GCC
|
||||
|
||||
#if WEBP_AARCH64
|
||||
|
||||
#if BPS == 32
|
||||
#define DC4_VE4_HE4_TM4_NEON(dst, tbl, res, lane) \
|
||||
do { \
|
||||
uint8x16_t r; \
|
||||
r = vqtbl2q_u8(qcombined, tbl); \
|
||||
r = vreinterpretq_u8_u32( \
|
||||
vsetq_lane_u32(vget_lane_u32(vreinterpret_u32_u8(res), lane), \
|
||||
vreinterpretq_u32_u8(r), 1)); \
|
||||
vst1q_u8(dst, r); \
|
||||
} while (0)
|
||||
|
||||
#define RD4_VR4_LD4_VL4_NEON(dst, tbl) \
|
||||
do { \
|
||||
uint8x16_t r; \
|
||||
r = vqtbl2q_u8(qcombined, tbl); \
|
||||
vst1q_u8(dst, r); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE uint8x16x4_t Vld1qU8x4(const uint8_t* ptr) {
|
||||
#if LOCAL_CLANG_PREREQ(3, 4) || LOCAL_GCC_PREREQ(9, 4) || defined(_MSC_VER)
|
||||
return vld1q_u8_x4(ptr);
|
||||
#else
|
||||
uint8x16x4_t res;
|
||||
INIT_VECTOR4(res,
|
||||
vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
|
||||
vld1q_u8(ptr + 2 * 16), vld1q_u8(ptr + 3 * 16));
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13
|
||||
// L K J I X A B C D E F G H
|
||||
// -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7
|
||||
static const uint8_t kLookupTbl1[64] = {
|
||||
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12,
|
||||
3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0,
|
||||
4, 20, 21, 22, 3, 18, 2, 17, 3, 19, 4, 20, 2, 17, 1, 16,
|
||||
2, 18, 3, 19, 1, 16, 31, 31, 1, 17, 2, 18, 31, 31, 31, 31
|
||||
};
|
||||
|
||||
static const uint8_t kLookupTbl2[64] = {
|
||||
20, 21, 22, 23, 5, 6, 7, 8, 22, 23, 24, 25, 6, 7, 8, 9,
|
||||
19, 20, 21, 22, 20, 21, 22, 23, 23, 24, 25, 26, 22, 23, 24, 25,
|
||||
18, 19, 20, 21, 19, 5, 6, 7, 24, 25, 26, 27, 7, 8, 9, 26,
|
||||
17, 18, 19, 20, 18, 20, 21, 22, 25, 26, 27, 28, 23, 24, 25, 27
|
||||
};
|
||||
|
||||
static const uint8_t kLookupTbl3[64] = {
|
||||
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 19, 19, 19, 19,
|
||||
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 18, 18, 18, 18,
|
||||
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 17, 17, 17, 17,
|
||||
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 16, 16, 16, 16
|
||||
};
|
||||
|
||||
const uint8x16x4_t lookup_avgs1 = Vld1qU8x4(kLookupTbl1);
|
||||
const uint8x16x4_t lookup_avgs2 = Vld1qU8x4(kLookupTbl2);
|
||||
const uint8x16x4_t lookup_avgs3 = Vld1qU8x4(kLookupTbl3);
|
||||
|
||||
const uint8x16_t preload = vld1q_u8(top - 5);
|
||||
uint8x16x2_t qcombined;
|
||||
uint8x16_t result0, result1;
|
||||
|
||||
uint8x16_t a = vqtbl1q_u8(preload, lookup_avgs1.val[0]);
|
||||
uint8x16_t b = preload;
|
||||
uint8x16_t c = vextq_u8(a, a, 2);
|
||||
|
||||
uint8x16_t avg3_all = vrhaddq_u8(vhaddq_u8(a, c), b);
|
||||
uint8x16_t avg2_all = vrhaddq_u8(a, b);
|
||||
|
||||
uint8x8_t preload_x8, sub_a, sub_c;
|
||||
uint8_t result_u8;
|
||||
uint8x8_t res_lo, res_hi;
|
||||
uint8x16_t full_b;
|
||||
uint16x8_t sub, sum_lo, sum_hi;
|
||||
|
||||
preload_x8 = vget_low_u8(c);
|
||||
preload_x8 = vset_lane_u8(vgetq_lane_u8(preload, 0), preload_x8, 3);
|
||||
|
||||
result_u8 = (vaddlv_u8(preload_x8) + 4) >> 3;
|
||||
|
||||
avg3_all = vsetq_lane_u8(vgetq_lane_u8(preload, 0), avg3_all, 15);
|
||||
avg3_all = vsetq_lane_u8(result_u8, avg3_all, 14);
|
||||
|
||||
qcombined.val[0] = avg2_all;
|
||||
qcombined.val[1] = avg3_all;
|
||||
|
||||
sub_a = vdup_laneq_u8(preload, 4);
|
||||
|
||||
// preload = {a,b,c,d,...} => full_b = {d,d,d,d,c,c,c,c,b,b,b,b,a,a,a,a}
|
||||
full_b = vqtbl1q_u8(preload, lookup_avgs1.val[1]);
|
||||
// preload = {a,b,c,d,...} => sub_c = {a,b,c,d,a,b,c,d,a,b,c,d,a,b,c,d}
|
||||
sub_c = vreinterpret_u8_u32(vdup_n_u32(
|
||||
vgetq_lane_u32(vreinterpretq_u32_u8(vextq_u8(preload, preload, 5)), 0)));
|
||||
|
||||
sub = vsubl_u8(sub_c, sub_a);
|
||||
sum_lo = vaddw_u8(sub, vget_low_u8(full_b));
|
||||
res_lo = vqmovun_s16(vreinterpretq_s16_u16(sum_lo));
|
||||
|
||||
sum_hi = vaddw_u8(sub, vget_high_u8(full_b));
|
||||
res_hi = vqmovun_s16(vreinterpretq_s16_u16(sum_hi));
|
||||
|
||||
// DC4, VE4, HE4, TM4
|
||||
DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 0, lookup_avgs3.val[0], res_lo, 0);
|
||||
DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 1, lookup_avgs3.val[1], res_lo, 1);
|
||||
DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 2, lookup_avgs3.val[2], res_hi, 0);
|
||||
DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 3, lookup_avgs3.val[3], res_hi, 1);
|
||||
|
||||
// RD4, VR4, LD4, VL4
|
||||
RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 0, lookup_avgs2.val[0]);
|
||||
RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 1, lookup_avgs2.val[1]);
|
||||
RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 2, lookup_avgs2.val[2]);
|
||||
RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 3, lookup_avgs2.val[3]);
|
||||
|
||||
// HD4, HU4
|
||||
result0 = vqtbl2q_u8(qcombined, lookup_avgs1.val[2]);
|
||||
result1 = vqtbl2q_u8(qcombined, lookup_avgs1.val[3]);
|
||||
|
||||
vst1_u8(dst + I4HD4 + BPS * 0, vget_low_u8(result0));
|
||||
vst1_u8(dst + I4HD4 + BPS * 1, vget_high_u8(result0));
|
||||
vst1_u8(dst + I4HD4 + BPS * 2, vget_low_u8(result1));
|
||||
vst1_u8(dst + I4HD4 + BPS * 3, vget_high_u8(result1));
|
||||
}
|
||||
#endif // BPS == 32
|
||||
|
||||
static WEBP_INLINE void Fill_NEON(uint8_t* dst, const uint8_t value) {
|
||||
uint8x16_t a = vdupq_n_u8(value);
|
||||
int i;
|
||||
for (i = 0; i < 16; i++) {
|
||||
vst1q_u8(dst + BPS * i, a);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Fill16_NEON(uint8_t* dst, const uint8_t* src) {
|
||||
uint8x16_t a = vld1q_u8(src);
|
||||
int i;
|
||||
for (i = 0; i < 16; i++) {
|
||||
vst1q_u8(dst + BPS * i, a);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred16_NEON(uint8_t* dst,
|
||||
const uint8_t* left) {
|
||||
uint8x16_t a;
|
||||
|
||||
if (left == NULL) {
|
||||
Fill_NEON(dst, 129);
|
||||
return;
|
||||
}
|
||||
|
||||
a = vld1q_u8(left + 0);
|
||||
vst1q_u8(dst + BPS * 0, vdupq_laneq_u8(a, 0));
|
||||
vst1q_u8(dst + BPS * 1, vdupq_laneq_u8(a, 1));
|
||||
vst1q_u8(dst + BPS * 2, vdupq_laneq_u8(a, 2));
|
||||
vst1q_u8(dst + BPS * 3, vdupq_laneq_u8(a, 3));
|
||||
vst1q_u8(dst + BPS * 4, vdupq_laneq_u8(a, 4));
|
||||
vst1q_u8(dst + BPS * 5, vdupq_laneq_u8(a, 5));
|
||||
vst1q_u8(dst + BPS * 6, vdupq_laneq_u8(a, 6));
|
||||
vst1q_u8(dst + BPS * 7, vdupq_laneq_u8(a, 7));
|
||||
vst1q_u8(dst + BPS * 8, vdupq_laneq_u8(a, 8));
|
||||
vst1q_u8(dst + BPS * 9, vdupq_laneq_u8(a, 9));
|
||||
vst1q_u8(dst + BPS * 10, vdupq_laneq_u8(a, 10));
|
||||
vst1q_u8(dst + BPS * 11, vdupq_laneq_u8(a, 11));
|
||||
vst1q_u8(dst + BPS * 12, vdupq_laneq_u8(a, 12));
|
||||
vst1q_u8(dst + BPS * 13, vdupq_laneq_u8(a, 13));
|
||||
vst1q_u8(dst + BPS * 14, vdupq_laneq_u8(a, 14));
|
||||
vst1q_u8(dst + BPS * 15, vdupq_laneq_u8(a, 15));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred16_NEON(uint8_t* dst, const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
Fill16_NEON(dst, top);
|
||||
} else {
|
||||
Fill_NEON(dst, 127);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode_NEON(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
uint8_t s;
|
||||
|
||||
if (top != NULL) {
|
||||
uint16_t dc;
|
||||
dc = vaddlvq_u8(vld1q_u8(top));
|
||||
if (left != NULL) {
|
||||
// top and left present.
|
||||
dc += vaddlvq_u8(vld1q_u8(left));
|
||||
s = vqrshrnh_n_u16(dc, 5);
|
||||
} else {
|
||||
// top but no left.
|
||||
s = vqrshrnh_n_u16(dc, 4);
|
||||
}
|
||||
} else {
|
||||
if (left != NULL) {
|
||||
uint16_t dc;
|
||||
// left but no top.
|
||||
dc = vaddlvq_u8(vld1q_u8(left));
|
||||
s = vqrshrnh_n_u16(dc, 4);
|
||||
} else {
|
||||
// No top, no left, nothing.
|
||||
s = 0x80;
|
||||
}
|
||||
}
|
||||
Fill_NEON(dst, s);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotionHelper_NEON(uint8_t* dst,
|
||||
const uint8x8_t outer,
|
||||
const uint8x8x2_t inner,
|
||||
const uint16x8_t a, int i,
|
||||
const int n) {
|
||||
uint8x8_t d1, d2;
|
||||
uint16x8_t r1, r2;
|
||||
|
||||
r1 = vaddl_u8(outer, inner.val[0]);
|
||||
r1 = vqsubq_u16(r1, a);
|
||||
d1 = vqmovun_s16(vreinterpretq_s16_u16(r1));
|
||||
r2 = vaddl_u8(outer, inner.val[1]);
|
||||
r2 = vqsubq_u16(r2, a);
|
||||
d2 = vqmovun_s16(vreinterpretq_s16_u16(r2));
|
||||
vst1_u8(dst + BPS * (i * 4 + n), d1);
|
||||
vst1_u8(dst + BPS * (i * 4 + n) + 8, d2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
int i;
|
||||
uint16x8_t a;
|
||||
uint8x8x2_t inner;
|
||||
|
||||
if (left == NULL) {
|
||||
// True motion without left samples (hence: with default 129 value) is
|
||||
// equivalent to VE prediction where you just copy the top samples.
|
||||
// Note that if top samples are not available, the default value is then
|
||||
// 129, and not 127 as in the VerticalPred case.
|
||||
if (top != NULL) {
|
||||
VerticalPred16_NEON(dst, top);
|
||||
} else {
|
||||
Fill_NEON(dst, 129);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// left is not NULL.
|
||||
if (top == NULL) {
|
||||
HorizontalPred16_NEON(dst, left);
|
||||
return;
|
||||
}
|
||||
|
||||
// Neither left nor top are NULL.
|
||||
a = vdupq_n_u16(left[-1]);
|
||||
inner = vld1_u8_x2(top);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
const uint8x8x4_t outer = vld4_dup_u8(&left[i * 4]);
|
||||
|
||||
TrueMotionHelper_NEON(dst, outer.val[0], inner, a, i, 0);
|
||||
TrueMotionHelper_NEON(dst, outer.val[1], inner, a, i, 1);
|
||||
TrueMotionHelper_NEON(dst, outer.val[2], inner, a, i, 2);
|
||||
TrueMotionHelper_NEON(dst, outer.val[3], inner, a, i, 3);
|
||||
}
|
||||
}
|
||||
|
||||
static void Intra16Preds_NEON(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode_NEON(I16DC16 + dst, left, top);
|
||||
VerticalPred16_NEON(I16VE16 + dst, top);
|
||||
HorizontalPred16_NEON(I16HE16 + dst, left);
|
||||
TrueMotion_NEON(I16TM16 + dst, left, top);
|
||||
}
|
||||
|
||||
#endif // WEBP_AARCH64
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
@ -931,9 +1222,17 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
|
||||
VP8SSE8x8 = SSE8x8_NEON;
|
||||
VP8SSE4x4 = SSE4x4_NEON;
|
||||
|
||||
#if WEBP_AARCH64
|
||||
#if BPS == 32
|
||||
VP8EncPredLuma4 = Intra4Preds_NEON;
|
||||
#endif
|
||||
VP8EncPredLuma16 = Intra16Preds_NEON;
|
||||
#endif
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
VP8EncQuantizeBlock = QuantizeBlock_NEON;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_NEON;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_NEON;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -26,8 +26,9 @@
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
// Does one inverse transform.
|
||||
static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -177,8 +178,9 @@ static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
|
||||
// Does two inverse transforms.
|
||||
static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -316,7 +318,9 @@ static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int do_two) {
|
||||
if (do_two) {
|
||||
ITransform_Two_SSE2(ref, in, dst);
|
||||
@ -373,7 +377,7 @@ static void FTransformPass1_SSE2(const __m128i* const in01,
|
||||
|
||||
static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
const __m128i* const v32,
|
||||
int16_t* out) {
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i seven = _mm_set1_epi16(7);
|
||||
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
|
||||
@ -424,8 +428,9 @@ static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
_mm_storeu_si128((__m128i*)&out[8], d2_f3);
|
||||
}
|
||||
|
||||
static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
// Load src.
|
||||
const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
|
||||
@ -468,8 +473,9 @@ static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
FTransformPass2_SSE2(&v01, &v32, out);
|
||||
}
|
||||
|
||||
static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform2_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load src and convert to 16b.
|
||||
@ -517,7 +523,8 @@ static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
FTransformPass2_SSE2(&v01h, &v32h, out + 16);
|
||||
}
|
||||
|
||||
static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
static void FTransformWHTRow_SSE2(const int16_t* WEBP_RESTRICT const in,
|
||||
__m128i* const out) {
|
||||
const __m128i kMult = _mm_set_epi16(-1, 1, -1, 1, 1, 1, 1, 1);
|
||||
const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
|
||||
const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
|
||||
@ -533,7 +540,8 @@ static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
*out = _mm_madd_epi16(D, kMult);
|
||||
}
|
||||
|
||||
static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
// Input is 12b signed.
|
||||
__m128i row0, row1, row2, row3;
|
||||
// Rows are 14b signed.
|
||||
@ -566,9 +574,10 @@ static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
// Compute susceptibility based on DCT-coeff histograms:
|
||||
// the higher, the "easier" the macroblock is to compress.
|
||||
|
||||
static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
@ -640,7 +649,8 @@ static WEBP_INLINE void Fill_SSE2(uint8_t* dst, int value, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int j;
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -648,7 +658,8 @@ static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_values = _mm_load_si128((const __m128i*)top);
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -656,8 +667,9 @@ static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
if (top != NULL) {
|
||||
if (size == 8) {
|
||||
VE8uv_SSE2(dst, top);
|
||||
@ -669,7 +681,8 @@ static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
@ -678,7 +691,8 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
@ -687,8 +701,9 @@ static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
int size) {
|
||||
if (left != NULL) {
|
||||
if (size == 8) {
|
||||
HE8uv_SSE2(dst, left);
|
||||
@ -700,8 +715,9 @@ static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top, int size) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
if (size == 8) {
|
||||
@ -728,8 +744,10 @@ static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
TM_SSE2(dst, left, top, size);
|
||||
@ -749,8 +767,9 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
|
||||
const __m128i combined = _mm_unpacklo_epi64(top_values, left_values);
|
||||
@ -758,7 +777,8 @@ static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
Put8x8uv_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i sum = _mm_sad_epu8(top_values, zero);
|
||||
@ -766,7 +786,8 @@ static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
Put8x8uv_SSE2(DC >> 3, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC8uvNoLeft_SSE2(dst, left);
|
||||
}
|
||||
@ -775,8 +796,9 @@ static WEBP_INLINE void DC8uvNoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put8x8uv_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC8uv_SSE2(dst, left, top);
|
||||
@ -790,8 +812,9 @@ static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const __m128i left_row = _mm_load_si128((const __m128i*)left);
|
||||
const int DC =
|
||||
@ -799,13 +822,15 @@ static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
Put16_SSE2(DC >> 5, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const int DC = VP8HorizontalAdd8b(&top_row) + 8;
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC16NoLeft_SSE2(dst, left);
|
||||
}
|
||||
@ -814,8 +839,9 @@ static WEBP_INLINE void DC16NoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put16_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC16_SSE2(dst, left, top);
|
||||
@ -844,8 +870,9 @@ static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
|
||||
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
|
||||
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // vertical
|
||||
// vertical
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -861,8 +888,9 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // horizontal
|
||||
// horizontal
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -874,15 +902,17 @@ static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill_SSE2(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-Left
|
||||
// Down-Left
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -898,8 +928,9 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Right
|
||||
// Vertical-Right
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -924,8 +955,9 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Left
|
||||
// Vertical-Left
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -951,8 +983,9 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
DST(3, 3) = (extra_out >> 8) & 0xff;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-right
|
||||
// Down-right
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
|
||||
const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
|
||||
@ -968,7 +1001,8 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -983,7 +1017,8 @@ static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -1006,7 +1041,8 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
|
||||
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
|
||||
@ -1028,7 +1064,8 @@ static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4_SSE2(I4DC4 + dst, top);
|
||||
TM4_SSE2(I4TM4 + dst, top);
|
||||
VE4_SSE2(I4VE4 + dst, top);
|
||||
@ -1044,8 +1081,9 @@ static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DC8uvMode_SSE2(C8DC8 + dst, left, top);
|
||||
VerticalPred_SSE2(C8VE8 + dst, top, 8);
|
||||
@ -1064,8 +1102,9 @@ static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC16Mode_SSE2(I16DC16 + dst, left, top);
|
||||
VerticalPred_SSE2(I16VE16 + dst, top, 16);
|
||||
HorizontalPred_SSE2(I16HE16 + dst, left, 16);
|
||||
@ -1092,7 +1131,8 @@ static WEBP_INLINE void SubtractAndAccumulate_SSE2(const __m128i a,
|
||||
*sum = _mm_add_epi32(sum1, sum2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int num_pairs) {
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
int32_t tmp[4];
|
||||
@ -1114,18 +1154,21 @@ static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
|
||||
}
|
||||
|
||||
static int SSE16x16_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return SSE_16xN_SSE2(a, b, 8);
|
||||
}
|
||||
|
||||
static int SSE16x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return SSE_16xN_SSE2(a, b, 4);
|
||||
}
|
||||
|
||||
#define LOAD_8x16b(ptr) \
|
||||
_mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
|
||||
|
||||
static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int num_pairs = 4;
|
||||
__m128i sum = zero;
|
||||
@ -1152,7 +1195,8 @@ static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
}
|
||||
#undef LOAD_8x16b
|
||||
|
||||
static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load values. Note that we read 8 pixels instead of 4,
|
||||
@ -1189,7 +1233,7 @@ static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_SSE2(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
const __m128i mask = _mm_set1_epi16(0x00ff);
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&ref[BPS * 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
|
||||
@ -1227,8 +1271,9 @@ static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
static int TTransform_SSE2(const uint8_t* WEBP_RESTRICT inA,
|
||||
const uint8_t* WEBP_RESTRICT inB,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int32_t sum[4];
|
||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -1328,14 +1373,16 @@ static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_SSE2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int diff_sum = TTransform_SSE2(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_SSE2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -1350,9 +1397,10 @@ static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
// Quantization
|
||||
//
|
||||
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const uint16_t* WEBP_RESTRICT const sharpen,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i coeff0, coeff8;
|
||||
@ -1463,17 +1511,17 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
|
@ -23,9 +23,10 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Compute susceptibility based on DCT-coeff histograms.
|
||||
|
||||
static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_SSE41(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -168,14 +169,16 @@ static int TTransform_SSE41(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_SSE41(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int diff_sum = TTransform_SSE41(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_SSE41(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -301,17 +304,17 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
#undef PSHUFB_CST
|
||||
|
||||
static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
|
@ -23,55 +23,42 @@
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; /* Silence unused warning. */ \
|
||||
} while (0)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length, int inverse) {
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
if (inverse) {
|
||||
for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] + pred[i]);
|
||||
} else {
|
||||
for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] - pred[i]);
|
||||
}
|
||||
for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] - pred[i]);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
++row;
|
||||
PredictLine_C(in, preds - stride, out, 1);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -81,35 +68,23 @@ static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
} else {
|
||||
// We are starting from in-between. Make sure 'preds' points to prev row.
|
||||
preds -= stride;
|
||||
}
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
PredictLine_C(in, preds, out, width, inverse);
|
||||
++row;
|
||||
for (row = 1; row < height; ++row) {
|
||||
PredictLine_C(in, preds, out, width);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -126,40 +101,31 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
int w;
|
||||
// leftmost pixel: predict from above.
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
PredictLine_C(in, preds - stride, out, 1);
|
||||
for (w = 1; w < width; ++w) {
|
||||
const int pred = GradientPredictor_C(preds[w - 1],
|
||||
preds[w - stride],
|
||||
preds[w - stride - 1]);
|
||||
out[w] = (uint8_t)(in[w] + (inverse ? pred : -pred));
|
||||
out[w] = (uint8_t)(in[w] - pred);
|
||||
}
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -172,20 +138,22 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void HorizontalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
|
||||
filtered_data);
|
||||
static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
|
@ -26,13 +26,12 @@
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; /* Silence unused warning. */ \
|
||||
} while (0)
|
||||
|
||||
#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do { \
|
||||
@ -103,7 +102,8 @@
|
||||
); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int length) {
|
||||
DO_PREDICT_LINE(src, dst, length, 0);
|
||||
}
|
||||
@ -184,99 +184,75 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
// Horizontal filter.
|
||||
|
||||
#define FILTER_LINE_BY_LINE do { \
|
||||
while (row < last_row) { \
|
||||
for (row = 1; row < height; ++row) { \
|
||||
PREDICT_LINE_ONE_PASS(in, preds - stride, out); \
|
||||
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0); \
|
||||
++row; \
|
||||
preds += stride; \
|
||||
in += stride; \
|
||||
out += stride; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = in;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
FILTER_LINE_BY_LINE;
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
|
||||
int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
#define FILTER_LINE_BY_LINE do { \
|
||||
while (row < last_row) { \
|
||||
for (row = 1; row < height; ++row) { \
|
||||
DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0); \
|
||||
++row; \
|
||||
preds += stride; \
|
||||
in += stride; \
|
||||
out += stride; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = in;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
} else {
|
||||
// We are starting from in-between. Make sure 'preds' points to prev row.
|
||||
preds -= stride;
|
||||
}
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
FILTER_LINE_BY_LINE;
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -297,7 +273,7 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
}
|
||||
|
||||
#define FILTER_LINE_BY_LINE(PREDS, OPERATION) do { \
|
||||
while (row < last_row) { \
|
||||
for (row = 1; row < height; ++row) { \
|
||||
int w; \
|
||||
PREDICT_LINE_ONE_PASS(in, PREDS - stride, out); \
|
||||
for (w = 1; w < width; ++w) { \
|
||||
@ -306,42 +282,34 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
PREDS[w - stride - 1]); \
|
||||
out[w] = in[w] OPERATION pred; \
|
||||
} \
|
||||
++row; \
|
||||
in += stride; \
|
||||
out += stride; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = in;
|
||||
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
FILTER_LINE_BY_LINE(in, -);
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -21,7 +21,8 @@
|
||||
|
||||
static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int length) {
|
||||
v16u8 src0, pred0, dst0;
|
||||
assert(length >= 0);
|
||||
while (length >= 32) {
|
||||
@ -58,8 +59,9 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
@ -68,8 +70,9 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horrizontal filter
|
||||
|
||||
static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
const uint8_t* preds = data;
|
||||
const uint8_t* in = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -99,8 +102,8 @@ static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
|
||||
static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
const uint8_t* ppred,
|
||||
uint8_t* poutput, int stride,
|
||||
int size) {
|
||||
uint8_t* WEBP_RESTRICT poutput,
|
||||
int stride, int size) {
|
||||
int w;
|
||||
const v16i8 zero = { 0 };
|
||||
while (size >= 16) {
|
||||
@ -131,8 +134,9 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
}
|
||||
|
||||
|
||||
static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -159,8 +163,9 @@ static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter
|
||||
|
||||
static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
|
@ -23,13 +23,12 @@
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; /* Silence unused warning. */ \
|
||||
} while (0)
|
||||
|
||||
// load eight u8 and widen to s16
|
||||
@ -46,7 +45,7 @@
|
||||
#define ROTATE_RIGHT_N(A, N) vext_u8((A), (A), (8 - (N)) % 8)
|
||||
|
||||
static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
assert(length >= 0);
|
||||
for (i = 0; i + 16 <= length; i += 16) {
|
||||
@ -59,86 +58,70 @@ static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft_NEON(const uint8_t* src, uint8_t* dst, int length) {
|
||||
static void PredictLineLeft_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
PredictLine_NEON(src, src - 1, dst, length);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoHorizontalFilter_NEON(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
out[0] = in[0] - in[-stride];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void HorizontalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void HorizontalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
|
||||
static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
PredictLine_NEON(in, in - stride, out, width);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void VerticalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -151,7 +134,8 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
|
||||
static void GradientPredictDirect_NEON(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
uint8_t* WEBP_RESTRICT const out,
|
||||
int length) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= length; i += 8) {
|
||||
const uint8x8_t A = vld1_u8(&row[i - 1]);
|
||||
@ -167,40 +151,31 @@ static void GradientPredictDirect_NEON(const uint8_t* const row,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
out[0] = in[0] - in[-stride];
|
||||
GradientPredictDirect_NEON(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void GradientFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
#undef DCHECK
|
||||
|
@ -27,15 +27,15 @@
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; /* Silence unused warning. */ \
|
||||
} while (0)
|
||||
|
||||
static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
static void PredictLineTop_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -53,7 +53,8 @@ static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
static void PredictLineLeft_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -73,32 +74,23 @@ static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
out[0] = in[0] - in[-stride];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
@ -107,30 +99,22 @@ static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
PredictLineTop_SSE2(in, in - stride, out, width);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
@ -146,7 +130,8 @@ static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
|
||||
static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
uint8_t* WEBP_RESTRICT const out,
|
||||
int length) {
|
||||
const int max_pos = length & ~7;
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -170,30 +155,22 @@ static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
out[0] = (uint8_t)(in[0] - in[-stride]);
|
||||
GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
@ -203,20 +180,22 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void HorizontalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
static void VerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
static void GradientFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -13,15 +13,21 @@
|
||||
// Jyrki Alakuijala (jyrki@google.com)
|
||||
// Urvang Joshi (urvang@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/decode.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Image transforms.
|
||||
@ -107,14 +113,14 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictors
|
||||
|
||||
uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
static uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)top;
|
||||
(void)left;
|
||||
return ARGB_BLACK;
|
||||
}
|
||||
uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
static uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)top;
|
||||
return *left;
|
||||
}
|
||||
@ -182,13 +188,13 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
}
|
||||
|
||||
static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int x;
|
||||
(void)upper;
|
||||
for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
|
||||
}
|
||||
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint32_t left = out[-1];
|
||||
(void)upper;
|
||||
@ -441,8 +447,8 @@ static int is_big_endian(void) {
|
||||
return (tmp.b[0] != 1);
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -452,8 +458,8 @@ void VP8LConvertBGRAToRGB_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -464,8 +470,8 @@ void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -481,8 +487,8 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -498,8 +504,8 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -509,8 +515,8 @@ void VP8LConvertBGRAToBGR_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
|
||||
int swap_on_big_endian) {
|
||||
static void CopyOrSwap(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst, int swap_on_big_endian) {
|
||||
if (is_big_endian() == swap_on_big_endian) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
@ -571,16 +577,21 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
||||
VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE;
|
||||
VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
|
||||
VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16];
|
||||
VP8LPredictorFunc VP8LPredictors[16];
|
||||
|
||||
// exposed plain-C implementations
|
||||
VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
|
||||
|
||||
VP8LTransformColorInverseFunc VP8LTransformColorInverse;
|
||||
VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE;
|
||||
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGB;
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGB_SSE;
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGBA;
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE;
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGB565;
|
||||
VP8LConvertFunc VP8LConvertBGRAToBGR;
|
||||
@ -591,6 +602,7 @@ VP8LMapAlphaFunc VP8LMapColor8b;
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8LDspInitSSE2(void);
|
||||
extern void VP8LDspInitSSE41(void);
|
||||
extern void VP8LDspInitAVX2(void);
|
||||
extern void VP8LDspInitNEON(void);
|
||||
extern void VP8LDspInitMIPSdspR2(void);
|
||||
extern void VP8LDspInitMSA(void);
|
||||
@ -643,6 +655,11 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) {
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
VP8LDspInitSSE41();
|
||||
#if defined(WEBP_HAVE_AVX2)
|
||||
if (VP8GetCPUInfo(kAVX2)) {
|
||||
VP8LDspInitAVX2();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "src/webp/types.h"
|
||||
#include "src/webp/decode.h"
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
@ -32,10 +33,6 @@ typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
extern VP8LPredictorFunc VP8LPredictors[16];
|
||||
|
||||
uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor2_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor3_C(const uint32_t* const left,
|
||||
@ -64,13 +61,15 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
// These Add/Sub function expects upper[-1] and out[-1] to be readable.
|
||||
typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
|
||||
const uint32_t* upper, int num_pixels,
|
||||
uint32_t* out);
|
||||
uint32_t* WEBP_RESTRICT out);
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16];
|
||||
|
||||
typedef void (*VP8LProcessDecBlueAndRedFunc)(const uint32_t* src,
|
||||
int num_pixels, uint32_t* dst);
|
||||
extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
||||
extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE;
|
||||
|
||||
typedef struct {
|
||||
// Note: the members are uint8_t, so that any negative values are
|
||||
@ -83,6 +82,7 @@ typedef void (*VP8LTransformColorInverseFunc)(const VP8LMultipliers* const m,
|
||||
const uint32_t* src,
|
||||
int num_pixels, uint32_t* dst);
|
||||
extern VP8LTransformColorInverseFunc VP8LTransformColorInverse;
|
||||
extern VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE;
|
||||
|
||||
struct VP8LTransform; // Defined in dec/vp8li.h.
|
||||
|
||||
@ -95,13 +95,15 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
|
||||
const uint32_t* const in, uint32_t* const out);
|
||||
|
||||
// Color space conversion.
|
||||
typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst);
|
||||
typedef void (*VP8LConvertFunc)(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGB;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToBGR;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGB_SSE;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE;
|
||||
|
||||
// Converts from BGRA to other color spaces.
|
||||
void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
|
||||
@ -131,13 +133,16 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst);
|
||||
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst);
|
||||
|
||||
@ -149,48 +154,55 @@ void VP8LDspInit(void);
|
||||
|
||||
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
|
||||
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
|
||||
uint32_t* dst, int num_pixels);
|
||||
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed_SSE;
|
||||
typedef void (*VP8LTransformColorFunc)(
|
||||
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT dst,
|
||||
int num_pixels);
|
||||
extern VP8LTransformColorFunc VP8LTransformColor;
|
||||
extern VP8LTransformColorFunc VP8LTransformColor_SSE;
|
||||
typedef void (*VP8LCollectColorBlueTransformsFunc)(
|
||||
const uint32_t* argb, int stride,
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue, int histo[]);
|
||||
int green_to_blue, int red_to_blue, uint32_t histo[]);
|
||||
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
|
||||
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms_SSE;
|
||||
|
||||
typedef void (*VP8LCollectColorRedTransformsFunc)(
|
||||
const uint32_t* argb, int stride,
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]);
|
||||
int green_to_red, uint32_t histo[]);
|
||||
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
|
||||
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms_SSE;
|
||||
|
||||
// Expose some C-only fallback functions
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels);
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels);
|
||||
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]);
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
|
||||
int green_to_red, uint32_t histo[]);
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
int histo[]);
|
||||
uint32_t histo[]);
|
||||
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsSub_SSE[16];
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Huffman-cost related functions.
|
||||
|
||||
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
int length);
|
||||
typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
|
||||
const int Y[256]);
|
||||
typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
|
||||
const uint32_t Y[256]);
|
||||
typedef uint64_t (*VP8LShannonEntropyFunc)(const uint32_t* X, int length);
|
||||
|
||||
extern VP8LCostFunc VP8LExtraCost;
|
||||
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||
extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||
extern VP8LShannonEntropyFunc VP8LShannonEntropy;
|
||||
|
||||
typedef struct { // small struct to hold counters
|
||||
int counts[2]; // index: 0=zero streak, 1=non-zero streak
|
||||
@ -198,7 +210,7 @@ typedef struct { // small struct to hold counters
|
||||
} VP8LStreaks;
|
||||
|
||||
typedef struct { // small struct to hold bit entropy results
|
||||
float entropy; // entropy
|
||||
uint64_t entropy; // entropy
|
||||
uint32_t sum; // sum of the population
|
||||
int nonzeros; // number of non-zero elements in the population
|
||||
uint32_t max_val; // maximum value in the population
|
||||
@ -212,26 +224,30 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy);
|
||||
// codec specific heuristics.
|
||||
typedef void (*VP8LGetCombinedEntropyUnrefinedFunc)(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats);
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats);
|
||||
extern VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
|
||||
|
||||
// Get the entropy for the distribution 'X'.
|
||||
typedef void (*VP8LGetEntropyUnrefinedFunc)(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats);
|
||||
typedef void (*VP8LGetEntropyUnrefinedFunc)(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats);
|
||||
extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
|
||||
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
VP8LBitEntropy* const entropy);
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy);
|
||||
|
||||
typedef void (*VP8LAddVectorFunc)(const uint32_t* a, const uint32_t* b,
|
||||
uint32_t* out, int size);
|
||||
typedef void (*VP8LAddVectorFunc)(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size);
|
||||
extern VP8LAddVectorFunc VP8LAddVector;
|
||||
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* a, uint32_t* out, int size);
|
||||
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size);
|
||||
extern VP8LAddVectorEqFunc VP8LAddVectorEq;
|
||||
void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out);
|
||||
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
|
||||
const VP8LHistogram* WEBP_RESTRICT const b,
|
||||
VP8LHistogram* WEBP_RESTRICT const out);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
@ -241,11 +257,13 @@ typedef int (*VP8LVectorMismatchFunc)(const uint32_t* const array1,
|
||||
// Returns the first index where array1 and array2 are different.
|
||||
extern VP8LVectorMismatchFunc VP8LVectorMismatch;
|
||||
|
||||
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* const row, int width,
|
||||
int xbits, uint32_t* dst);
|
||||
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits,
|
||||
uint32_t* WEBP_RESTRICT dst);
|
||||
extern VP8LBundleColorMapFunc VP8LBundleColorMap;
|
||||
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst);
|
||||
extern VP8LBundleColorMapFunc VP8LBundleColorMap_SSE;
|
||||
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits, uint32_t* WEBP_RESTRICT dst);
|
||||
|
||||
// Must be called before calling any of the above methods.
|
||||
void VP8LEncDspInit(void);
|
||||
|
442
src/dsp/lossless_avx2.c
Normal file
442
src/dsp/lossless_avx2.c
Normal file
@ -0,0 +1,442 @@
|
||||
// Copyright 2025 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// AVX2 variant of methods for lossless decoder
|
||||
//
|
||||
// Author: Vincent Rabaud (vrabaud@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_AVX2)
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictor Transform
|
||||
|
||||
static WEBP_INLINE void Average2_m256i(const __m256i* const a0,
|
||||
const __m256i* const a1,
|
||||
__m256i* const avg) {
|
||||
// (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
|
||||
const __m256i ones = _mm256_set1_epi8(1);
|
||||
const __m256i avg1 = _mm256_avg_epu8(*a0, *a1);
|
||||
const __m256i one = _mm256_and_si256(_mm256_xor_si256(*a0, *a1), ones);
|
||||
*avg = _mm256_sub_epi8(avg1, one);
|
||||
}
|
||||
|
||||
// Batch versions of those functions.
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorAdd0_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m256i black = _mm256_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
const __m256i res = _mm256_add_epi8(src, black);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_SSE[0](in + i, NULL, num_pixels - i, out + i);
|
||||
}
|
||||
(void)upper;
|
||||
}
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m256i prev = _mm256_set1_epi32((int)out[-1]);
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
// h | g | f | e | d | c | b | a
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
// g | f | e | 0 | c | b | a | 0
|
||||
const __m256i shift0 = _mm256_slli_si256(src, 4);
|
||||
// g + h | f + g | e + f | e | c + d | b + c | a + b | a
|
||||
const __m256i sum0 = _mm256_add_epi8(src, shift0);
|
||||
// e + f | e | 0 | 0 | a + b | a | 0 | 0
|
||||
const __m256i shift1 = _mm256_slli_si256(sum0, 8);
|
||||
// e + f + g + h | e + f + g | e + f | e | a + b + c + d | a + b + c | a + b
|
||||
// | a
|
||||
const __m256i sum1 = _mm256_add_epi8(sum0, shift1);
|
||||
// Add a + b + c + d to the upper lane.
|
||||
const int32_t sum_abcd = _mm256_extract_epi32(sum1, 3);
|
||||
const __m256i sum2 = _mm256_add_epi8(
|
||||
sum1,
|
||||
_mm256_set_epi32(sum_abcd, sum_abcd, sum_abcd, sum_abcd, 0, 0, 0, 0));
|
||||
|
||||
const __m256i res = _mm256_add_epi8(sum2, prev);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res);
|
||||
// replicate last res output in prev.
|
||||
prev = _mm256_permutevar8x32_epi32(
|
||||
res, _mm256_set_epi32(7, 7, 7, 7, 7, 7, 7, 7));
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_SSE[1](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Macro that adds 32-bit integers from IN using mod 256 arithmetic
|
||||
// per 8 bit channel.
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorAdd##X##_AVX2(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) { \
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]); \
|
||||
const __m256i other = _mm256_loadu_si256((const __m256i*)&(IN)); \
|
||||
const __m256i res = _mm256_add_epi8(src, other); \
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res); \
|
||||
} \
|
||||
if (i != num_pixels) { \
|
||||
VP8LPredictorsAdd_SSE[(X)](in + i, upper + i, num_pixels - i, out + i); \
|
||||
} \
|
||||
}
|
||||
|
||||
// Predictor2: Top.
|
||||
GENERATE_PREDICTOR_1(2, upper[i])
|
||||
// Predictor3: Top-right.
|
||||
GENERATE_PREDICTOR_1(3, upper[i + 1])
|
||||
// Predictor4: Top-left.
|
||||
GENERATE_PREDICTOR_1(4, upper[i - 1])
|
||||
#undef GENERATE_PREDICTOR_1
|
||||
|
||||
// Due to averages with integers, values cannot be accumulated in parallel for
|
||||
// predictors 5 to 7.
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, IN) \
|
||||
static void PredictorAdd##X##_AVX2(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) { \
|
||||
const __m256i Tother = _mm256_loadu_si256((const __m256i*)&(IN)); \
|
||||
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]); \
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]); \
|
||||
__m256i avg, res; \
|
||||
Average2_m256i(&T, &Tother, &avg); \
|
||||
res = _mm256_add_epi8(avg, src); \
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res); \
|
||||
} \
|
||||
if (i != num_pixels) { \
|
||||
VP8LPredictorsAdd_SSE[(X)](in + i, upper + i, num_pixels - i, out + i); \
|
||||
} \
|
||||
}
|
||||
// Predictor8: average TL T.
|
||||
GENERATE_PREDICTOR_2(8, upper[i - 1])
|
||||
// Predictor9: average T TR.
|
||||
GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
#undef GENERATE_PREDICTOR_2
|
||||
|
||||
// Predictor10: average of (average of (L,TL), average of (T, TR)).
|
||||
#define DO_PRED10(OUT) \
|
||||
do { \
|
||||
__m256i avgLTL, avg; \
|
||||
Average2_m256i(&L, &TL, &avgLTL); \
|
||||
Average2_m256i(&avgTTR, &avgLTL, &avg); \
|
||||
L = _mm256_add_epi8(avg, src); \
|
||||
out[i + (OUT)] = (uint32_t)_mm256_cvtsi256_si32(L); \
|
||||
} while (0)
|
||||
|
||||
#define DO_PRED10_SHIFT \
|
||||
do { \
|
||||
/* Rotate the pre-computed values for the next iteration.*/ \
|
||||
avgTTR = _mm256_srli_si256(avgTTR, 4); \
|
||||
TL = _mm256_srli_si256(TL, 4); \
|
||||
src = _mm256_srli_si256(src, 4); \
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i, j;
|
||||
__m256i L = _mm256_setr_epi32((int)out[-1], 0, 0, 0, 0, 0, 0, 0);
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
__m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
__m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
|
||||
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
|
||||
const __m256i TR = _mm256_loadu_si256((const __m256i*)&upper[i + 1]);
|
||||
__m256i avgTTR;
|
||||
Average2_m256i(&T, &TR, &avgTTR);
|
||||
{
|
||||
const __m256i avgTTR_bak = avgTTR;
|
||||
const __m256i TL_bak = TL;
|
||||
const __m256i src_bak = src;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
DO_PRED10(j);
|
||||
DO_PRED10_SHIFT;
|
||||
}
|
||||
avgTTR = _mm256_permute2x128_si256(avgTTR_bak, avgTTR_bak, 1);
|
||||
TL = _mm256_permute2x128_si256(TL_bak, TL_bak, 1);
|
||||
src = _mm256_permute2x128_si256(src_bak, src_bak, 1);
|
||||
for (; j < 8; ++j) {
|
||||
DO_PRED10(j);
|
||||
DO_PRED10_SHIFT;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_SSE[10](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
#undef DO_PRED10
|
||||
#undef DO_PRED10_SHIFT
|
||||
|
||||
// Predictor11: select.
|
||||
#define DO_PRED11(OUT) \
|
||||
do { \
|
||||
const __m256i L_lo = _mm256_unpacklo_epi32(L, T); \
|
||||
const __m256i TL_lo = _mm256_unpacklo_epi32(TL, T); \
|
||||
const __m256i pb = _mm256_sad_epu8(L_lo, TL_lo); /* pb = sum |L-TL|*/ \
|
||||
const __m256i mask = _mm256_cmpgt_epi32(pb, pa); \
|
||||
const __m256i A = _mm256_and_si256(mask, L); \
|
||||
const __m256i B = _mm256_andnot_si256(mask, T); \
|
||||
const __m256i pred = _mm256_or_si256(A, B); /* pred = (pa > b)? L : T*/ \
|
||||
L = _mm256_add_epi8(src, pred); \
|
||||
out[i + (OUT)] = (uint32_t)_mm256_cvtsi256_si32(L); \
|
||||
} while (0)
|
||||
|
||||
#define DO_PRED11_SHIFT \
|
||||
do { \
|
||||
/* Shift the pre-computed value for the next iteration.*/ \
|
||||
T = _mm256_srli_si256(T, 4); \
|
||||
TL = _mm256_srli_si256(TL, 4); \
|
||||
src = _mm256_srli_si256(src, 4); \
|
||||
pa = _mm256_srli_si256(pa, 4); \
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i, j;
|
||||
__m256i pa;
|
||||
__m256i L = _mm256_setr_epi32((int)out[-1], 0, 0, 0, 0, 0, 0, 0);
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
__m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
|
||||
__m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
|
||||
__m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
{
|
||||
// We can unpack with any value on the upper 32 bits, provided it's the
|
||||
// same on both operands (so that their sum of abs diff is zero). Here we
|
||||
// use T.
|
||||
const __m256i T_lo = _mm256_unpacklo_epi32(T, T);
|
||||
const __m256i TL_lo = _mm256_unpacklo_epi32(TL, T);
|
||||
const __m256i T_hi = _mm256_unpackhi_epi32(T, T);
|
||||
const __m256i TL_hi = _mm256_unpackhi_epi32(TL, T);
|
||||
const __m256i s_lo = _mm256_sad_epu8(T_lo, TL_lo);
|
||||
const __m256i s_hi = _mm256_sad_epu8(T_hi, TL_hi);
|
||||
pa = _mm256_packs_epi32(s_lo, s_hi); // pa = sum |T-TL|
|
||||
}
|
||||
{
|
||||
const __m256i T_bak = T;
|
||||
const __m256i TL_bak = TL;
|
||||
const __m256i src_bak = src;
|
||||
const __m256i pa_bak = pa;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
DO_PRED11(j);
|
||||
DO_PRED11_SHIFT;
|
||||
}
|
||||
T = _mm256_permute2x128_si256(T_bak, T_bak, 1);
|
||||
TL = _mm256_permute2x128_si256(TL_bak, TL_bak, 1);
|
||||
src = _mm256_permute2x128_si256(src_bak, src_bak, 1);
|
||||
pa = _mm256_permute2x128_si256(pa_bak, pa_bak, 1);
|
||||
for (; j < 8; ++j) {
|
||||
DO_PRED11(j);
|
||||
DO_PRED11_SHIFT;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_SSE[11](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
#undef DO_PRED11
|
||||
#undef DO_PRED11_SHIFT
|
||||
|
||||
// Predictor12: ClampedAddSubtractFull.
|
||||
#define DO_PRED12(DIFF, OUT) \
|
||||
do { \
|
||||
const __m256i all = _mm256_add_epi16(L, (DIFF)); \
|
||||
const __m256i alls = _mm256_packus_epi16(all, all); \
|
||||
const __m256i res = _mm256_add_epi8(src, alls); \
|
||||
out[i + (OUT)] = (uint32_t)_mm256_cvtsi256_si32(res); \
|
||||
L = _mm256_unpacklo_epi8(res, zero); \
|
||||
} while (0)
|
||||
|
||||
#define DO_PRED12_SHIFT(DIFF, LANE) \
|
||||
do { \
|
||||
/* Shift the pre-computed value for the next iteration.*/ \
|
||||
if ((LANE) == 0) (DIFF) = _mm256_srli_si256(DIFF, 8); \
|
||||
src = _mm256_srli_si256(src, 4); \
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd12_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
const __m256i L8 = _mm256_setr_epi32((int)out[-1], 0, 0, 0, 0, 0, 0, 0);
|
||||
__m256i L = _mm256_unpacklo_epi8(L8, zero);
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
// Load 8 pixels at a time.
|
||||
__m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
|
||||
const __m256i T_lo = _mm256_unpacklo_epi8(T, zero);
|
||||
const __m256i T_hi = _mm256_unpackhi_epi8(T, zero);
|
||||
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
|
||||
const __m256i TL_lo = _mm256_unpacklo_epi8(TL, zero);
|
||||
const __m256i TL_hi = _mm256_unpackhi_epi8(TL, zero);
|
||||
__m256i diff_lo = _mm256_sub_epi16(T_lo, TL_lo);
|
||||
__m256i diff_hi = _mm256_sub_epi16(T_hi, TL_hi);
|
||||
const __m256i diff_lo_bak = diff_lo;
|
||||
const __m256i diff_hi_bak = diff_hi;
|
||||
const __m256i src_bak = src;
|
||||
DO_PRED12(diff_lo, 0);
|
||||
DO_PRED12_SHIFT(diff_lo, 0);
|
||||
DO_PRED12(diff_lo, 1);
|
||||
DO_PRED12_SHIFT(diff_lo, 0);
|
||||
DO_PRED12(diff_hi, 2);
|
||||
DO_PRED12_SHIFT(diff_hi, 0);
|
||||
DO_PRED12(diff_hi, 3);
|
||||
DO_PRED12_SHIFT(diff_hi, 0);
|
||||
|
||||
// Process the upper lane.
|
||||
diff_lo = _mm256_permute2x128_si256(diff_lo_bak, diff_lo_bak, 1);
|
||||
diff_hi = _mm256_permute2x128_si256(diff_hi_bak, diff_hi_bak, 1);
|
||||
src = _mm256_permute2x128_si256(src_bak, src_bak, 1);
|
||||
|
||||
DO_PRED12(diff_lo, 4);
|
||||
DO_PRED12_SHIFT(diff_lo, 0);
|
||||
DO_PRED12(diff_lo, 5);
|
||||
DO_PRED12_SHIFT(diff_lo, 1);
|
||||
DO_PRED12(diff_hi, 6);
|
||||
DO_PRED12_SHIFT(diff_hi, 0);
|
||||
DO_PRED12(diff_hi, 7);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_SSE[12](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
#undef DO_PRED12
|
||||
#undef DO_PRED12_SHIFT
|
||||
|
||||
// Due to averages with integers, values cannot be accumulated in parallel for
|
||||
// predictors 13.
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Subtract-Green Transform
|
||||
|
||||
static void AddGreenToBlueAndRed_AVX2(const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
int i;
|
||||
const __m256i kCstShuffle = _mm256_set_epi8(
|
||||
-1, 29, -1, 29, -1, 25, -1, 25, -1, 21, -1, 21, -1, 17, -1, 17, -1, 13,
|
||||
-1, 13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1);
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i in = _mm256_loadu_si256((const __m256i*)&src[i]); // argb
|
||||
const __m256i in_0g0g = _mm256_shuffle_epi8(in, kCstShuffle); // 0g0g
|
||||
const __m256i out = _mm256_add_epi8(in, in_0g0g);
|
||||
_mm256_storeu_si256((__m256i*)&dst[i], out);
|
||||
}
|
||||
// fallthrough and finish off with SSE.
|
||||
if (i != num_pixels) {
|
||||
VP8LAddGreenToBlueAndRed_SSE(src + i, num_pixels - i, dst + i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static void TransformColorInverse_AVX2(const VP8LMultipliers* const m,
|
||||
const uint32_t* const src,
|
||||
int num_pixels, uint32_t* dst) {
|
||||
// sign-extended multiplying constants, pre-shifted by 5.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
|
||||
const __m256i mults_rb =
|
||||
_mm256_set1_epi32((int)((uint32_t)CST(green_to_red_) << 16 |
|
||||
(CST(green_to_blue_) & 0xffff)));
|
||||
const __m256i mults_b2 = _mm256_set1_epi32(CST(red_to_blue_));
|
||||
#undef CST
|
||||
const __m256i mask_ag = _mm256_set1_epi32((int)0xff00ff00);
|
||||
const __m256i perm1 = _mm256_setr_epi8(
|
||||
-1, 1, -1, 1, -1, 5, -1, 5, -1, 9, -1, 9, -1, 13, -1, 13, -1, 17, -1, 17,
|
||||
-1, 21, -1, 21, -1, 25, -1, 25, -1, 29, -1, 29);
|
||||
const __m256i perm2 = _mm256_setr_epi8(
|
||||
-1, 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1, 18, -1,
|
||||
-1, -1, 22, -1, -1, -1, 26, -1, -1, -1, 30, -1, -1);
|
||||
int i;
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i A = _mm256_loadu_si256((const __m256i*)(src + i));
|
||||
const __m256i B = _mm256_shuffle_epi8(A, perm1); // argb -> g0g0
|
||||
const __m256i C = _mm256_mulhi_epi16(B, mults_rb);
|
||||
const __m256i D = _mm256_add_epi8(A, C);
|
||||
const __m256i E = _mm256_shuffle_epi8(D, perm2);
|
||||
const __m256i F = _mm256_mulhi_epi16(E, mults_b2);
|
||||
const __m256i G = _mm256_add_epi8(D, F);
|
||||
const __m256i out = _mm256_blendv_epi8(G, A, mask_ag);
|
||||
_mm256_storeu_si256((__m256i*)&dst[i], out);
|
||||
}
|
||||
// Fall-back to SSE-version for left-overs.
|
||||
if (i != num_pixels) {
|
||||
VP8LTransformColorInverse_SSE(m, src + i, num_pixels - i, dst + i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
|
||||
static void ConvertBGRAToRGBA_AVX2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m256i* in = (const __m256i*)src;
|
||||
__m256i* out = (__m256i*)dst;
|
||||
while (num_pixels >= 8) {
|
||||
const __m256i A = _mm256_loadu_si256(in++);
|
||||
const __m256i B = _mm256_shuffle_epi8(
|
||||
A,
|
||||
_mm256_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2,
|
||||
15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2));
|
||||
_mm256_storeu_si256(out++, B);
|
||||
num_pixels -= 8;
|
||||
}
|
||||
// left-overs
|
||||
if (num_pixels > 0) {
|
||||
VP8LConvertBGRAToRGBA_SSE((const uint32_t*)in, num_pixels, (uint8_t*)out);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void VP8LDspInitAVX2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitAVX2(void) {
|
||||
VP8LPredictorsAdd[0] = PredictorAdd0_AVX2;
|
||||
VP8LPredictorsAdd[1] = PredictorAdd1_AVX2;
|
||||
VP8LPredictorsAdd[2] = PredictorAdd2_AVX2;
|
||||
VP8LPredictorsAdd[3] = PredictorAdd3_AVX2;
|
||||
VP8LPredictorsAdd[4] = PredictorAdd4_AVX2;
|
||||
VP8LPredictorsAdd[8] = PredictorAdd8_AVX2;
|
||||
VP8LPredictorsAdd[9] = PredictorAdd9_AVX2;
|
||||
VP8LPredictorsAdd[10] = PredictorAdd10_AVX2;
|
||||
VP8LPredictorsAdd[11] = PredictorAdd11_AVX2;
|
||||
VP8LPredictorsAdd[12] = PredictorAdd12_AVX2;
|
||||
|
||||
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_AVX2;
|
||||
VP8LTransformColorInverse = TransformColorInverse_AVX2;
|
||||
VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_AVX2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_AVX2
|
||||
|
||||
WEBP_DSP_INIT_STUB(VP8LDspInitAVX2)
|
||||
|
||||
#endif // WEBP_USE_AVX2
|
@ -73,23 +73,44 @@ static WEBP_INLINE int VP8LNearLosslessBits(int near_lossless_quality) {
|
||||
// Keeping a high threshold for now.
|
||||
#define APPROX_LOG_WITH_CORRECTION_MAX 65536
|
||||
#define APPROX_LOG_MAX 4096
|
||||
// VP8LFastLog2 and VP8LFastSLog2 are used on elements from image histograms.
|
||||
// The histogram values cannot exceed the maximum number of pixels, which
|
||||
// is (1 << 14) * (1 << 14). Therefore S * log(S) < (1 << 33).
|
||||
// No more than 32 bits of precision should be chosen.
|
||||
// To match the original float implementation, 23 bits of precision are used.
|
||||
#define LOG_2_PRECISION_BITS 23
|
||||
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
||||
// LOG_2_RECIPROCAL * (1 << LOG_2_PRECISION_BITS)
|
||||
#define LOG_2_RECIPROCAL_FIXED_DOUBLE 12102203.161561485379934310913085937500
|
||||
#define LOG_2_RECIPROCAL_FIXED ((uint64_t)12102203)
|
||||
#define LOG_LOOKUP_IDX_MAX 256
|
||||
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||
typedef float (*VP8LFastLog2SlowFunc)(uint32_t v);
|
||||
extern const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||
extern const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||
typedef uint32_t (*VP8LFastLog2SlowFunc)(uint32_t v);
|
||||
typedef uint64_t (*VP8LFastSLog2SlowFunc)(uint32_t v);
|
||||
|
||||
extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||
extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||
extern VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;
|
||||
|
||||
static WEBP_INLINE float VP8LFastLog2(uint32_t v) {
|
||||
static WEBP_INLINE uint32_t VP8LFastLog2(uint32_t v) {
|
||||
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
|
||||
}
|
||||
// Fast calculation of v * log2(v) for integer input.
|
||||
static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
|
||||
static WEBP_INLINE uint64_t VP8LFastSLog2(uint32_t v) {
|
||||
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint64_t RightShiftRound(uint64_t v, uint32_t shift) {
|
||||
return (v + (1ull << shift >> 1)) >> shift;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int64_t DivRound(int64_t a, int64_t b) {
|
||||
return ((a < 0) == (b < 0)) ? ((a + b / 2) / b) : ((a - b / 2) / b);
|
||||
}
|
||||
|
||||
#define WEBP_INT64_MAX ((int64_t)((1ull << 63) - 1))
|
||||
#define WEBP_UINT64_MAX (~0ull)
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
|
||||
@ -173,15 +194,15 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
|
||||
|
||||
// The predictor is added to the output pixel (which
|
||||
// is therefore considered as a residual) to get the final prediction.
|
||||
#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \
|
||||
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
|
||||
out[x] = VP8LAddPixels(in[x], pred); \
|
||||
} \
|
||||
#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \
|
||||
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
|
||||
out[x] = VP8LAddPixels(in[x], pred); \
|
||||
} \
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -13,214 +13,137 @@
|
||||
// Jyrki Alakuijala (jyrki@google.com)
|
||||
// Urvang Joshi (urvang@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
// lookup table for small values of log2(int)
|
||||
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
||||
0.0000000000000000f, 0.0000000000000000f,
|
||||
1.0000000000000000f, 1.5849625007211560f,
|
||||
2.0000000000000000f, 2.3219280948873621f,
|
||||
2.5849625007211560f, 2.8073549220576041f,
|
||||
3.0000000000000000f, 3.1699250014423121f,
|
||||
3.3219280948873621f, 3.4594316186372973f,
|
||||
3.5849625007211560f, 3.7004397181410921f,
|
||||
3.8073549220576041f, 3.9068905956085187f,
|
||||
4.0000000000000000f, 4.0874628412503390f,
|
||||
4.1699250014423121f, 4.2479275134435852f,
|
||||
4.3219280948873626f, 4.3923174227787606f,
|
||||
4.4594316186372973f, 4.5235619560570130f,
|
||||
4.5849625007211560f, 4.6438561897747243f,
|
||||
4.7004397181410917f, 4.7548875021634682f,
|
||||
4.8073549220576037f, 4.8579809951275718f,
|
||||
4.9068905956085187f, 4.9541963103868749f,
|
||||
5.0000000000000000f, 5.0443941193584533f,
|
||||
5.0874628412503390f, 5.1292830169449663f,
|
||||
5.1699250014423121f, 5.2094533656289501f,
|
||||
5.2479275134435852f, 5.2854022188622487f,
|
||||
5.3219280948873626f, 5.3575520046180837f,
|
||||
5.3923174227787606f, 5.4262647547020979f,
|
||||
5.4594316186372973f, 5.4918530963296747f,
|
||||
5.5235619560570130f, 5.5545888516776376f,
|
||||
5.5849625007211560f, 5.6147098441152083f,
|
||||
5.6438561897747243f, 5.6724253419714951f,
|
||||
5.7004397181410917f, 5.7279204545631987f,
|
||||
5.7548875021634682f, 5.7813597135246599f,
|
||||
5.8073549220576037f, 5.8328900141647412f,
|
||||
5.8579809951275718f, 5.8826430493618415f,
|
||||
5.9068905956085187f, 5.9307373375628866f,
|
||||
5.9541963103868749f, 5.9772799234999167f,
|
||||
6.0000000000000000f, 6.0223678130284543f,
|
||||
6.0443941193584533f, 6.0660891904577720f,
|
||||
6.0874628412503390f, 6.1085244567781691f,
|
||||
6.1292830169449663f, 6.1497471195046822f,
|
||||
6.1699250014423121f, 6.1898245588800175f,
|
||||
6.2094533656289501f, 6.2288186904958804f,
|
||||
6.2479275134435852f, 6.2667865406949010f,
|
||||
6.2854022188622487f, 6.3037807481771030f,
|
||||
6.3219280948873626f, 6.3398500028846243f,
|
||||
6.3575520046180837f, 6.3750394313469245f,
|
||||
6.3923174227787606f, 6.4093909361377017f,
|
||||
6.4262647547020979f, 6.4429434958487279f,
|
||||
6.4594316186372973f, 6.4757334309663976f,
|
||||
6.4918530963296747f, 6.5077946401986963f,
|
||||
6.5235619560570130f, 6.5391588111080309f,
|
||||
6.5545888516776376f, 6.5698556083309478f,
|
||||
6.5849625007211560f, 6.5999128421871278f,
|
||||
6.6147098441152083f, 6.6293566200796094f,
|
||||
6.6438561897747243f, 6.6582114827517946f,
|
||||
6.6724253419714951f, 6.6865005271832185f,
|
||||
6.7004397181410917f, 6.7142455176661224f,
|
||||
6.7279204545631987f, 6.7414669864011464f,
|
||||
6.7548875021634682f, 6.7681843247769259f,
|
||||
6.7813597135246599f, 6.7944158663501061f,
|
||||
6.8073549220576037f, 6.8201789624151878f,
|
||||
6.8328900141647412f, 6.8454900509443747f,
|
||||
6.8579809951275718f, 6.8703647195834047f,
|
||||
6.8826430493618415f, 6.8948177633079437f,
|
||||
6.9068905956085187f, 6.9188632372745946f,
|
||||
6.9307373375628866f, 6.9425145053392398f,
|
||||
6.9541963103868749f, 6.9657842846620869f,
|
||||
6.9772799234999167f, 6.9886846867721654f,
|
||||
7.0000000000000000f, 7.0112272554232539f,
|
||||
7.0223678130284543f, 7.0334230015374501f,
|
||||
7.0443941193584533f, 7.0552824355011898f,
|
||||
7.0660891904577720f, 7.0768155970508308f,
|
||||
7.0874628412503390f, 7.0980320829605263f,
|
||||
7.1085244567781691f, 7.1189410727235076f,
|
||||
7.1292830169449663f, 7.1395513523987936f,
|
||||
7.1497471195046822f, 7.1598713367783890f,
|
||||
7.1699250014423121f, 7.1799090900149344f,
|
||||
7.1898245588800175f, 7.1996723448363644f,
|
||||
7.2094533656289501f, 7.2191685204621611f,
|
||||
7.2288186904958804f, 7.2384047393250785f,
|
||||
7.2479275134435852f, 7.2573878426926521f,
|
||||
7.2667865406949010f, 7.2761244052742375f,
|
||||
7.2854022188622487f, 7.2946207488916270f,
|
||||
7.3037807481771030f, 7.3128829552843557f,
|
||||
7.3219280948873626f, 7.3309168781146167f,
|
||||
7.3398500028846243f, 7.3487281542310771f,
|
||||
7.3575520046180837f, 7.3663222142458160f,
|
||||
7.3750394313469245f, 7.3837042924740519f,
|
||||
7.3923174227787606f, 7.4008794362821843f,
|
||||
7.4093909361377017f, 7.4178525148858982f,
|
||||
7.4262647547020979f, 7.4346282276367245f,
|
||||
7.4429434958487279f, 7.4512111118323289f,
|
||||
7.4594316186372973f, 7.4676055500829976f,
|
||||
7.4757334309663976f, 7.4838157772642563f,
|
||||
7.4918530963296747f, 7.4998458870832056f,
|
||||
7.5077946401986963f, 7.5156998382840427f,
|
||||
7.5235619560570130f, 7.5313814605163118f,
|
||||
7.5391588111080309f, 7.5468944598876364f,
|
||||
7.5545888516776376f, 7.5622424242210728f,
|
||||
7.5698556083309478f, 7.5774288280357486f,
|
||||
7.5849625007211560f, 7.5924570372680806f,
|
||||
7.5999128421871278f, 7.6073303137496104f,
|
||||
7.6147098441152083f, 7.6220518194563764f,
|
||||
7.6293566200796094f, 7.6366246205436487f,
|
||||
7.6438561897747243f, 7.6510516911789281f,
|
||||
7.6582114827517946f, 7.6653359171851764f,
|
||||
7.6724253419714951f, 7.6794800995054464f,
|
||||
7.6865005271832185f, 7.6934869574993252f,
|
||||
7.7004397181410917f, 7.7073591320808825f,
|
||||
7.7142455176661224f, 7.7210991887071855f,
|
||||
7.7279204545631987f, 7.7347096202258383f,
|
||||
7.7414669864011464f, 7.7481928495894605f,
|
||||
7.7548875021634682f, 7.7615512324444795f,
|
||||
7.7681843247769259f, 7.7747870596011736f,
|
||||
7.7813597135246599f, 7.7879025593914317f,
|
||||
7.7944158663501061f, 7.8008998999203047f,
|
||||
7.8073549220576037f, 7.8137811912170374f,
|
||||
7.8201789624151878f, 7.8265484872909150f,
|
||||
7.8328900141647412f, 7.8392037880969436f,
|
||||
7.8454900509443747f, 7.8517490414160571f,
|
||||
7.8579809951275718f, 7.8641861446542797f,
|
||||
7.8703647195834047f, 7.8765169465649993f,
|
||||
7.8826430493618415f, 7.8887432488982591f,
|
||||
7.8948177633079437f, 7.9008668079807486f,
|
||||
7.9068905956085187f, 7.9128893362299619f,
|
||||
7.9188632372745946f, 7.9248125036057812f,
|
||||
7.9307373375628866f, 7.9366379390025709f,
|
||||
7.9425145053392398f, 7.9483672315846778f,
|
||||
7.9541963103868749f, 7.9600019320680805f,
|
||||
7.9657842846620869f, 7.9715435539507719f,
|
||||
7.9772799234999167f, 7.9829935746943103f,
|
||||
7.9886846867721654f, 7.9943534368588577f
|
||||
// lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS).
|
||||
// Obtained in Python with:
|
||||
// a = [ str(round((1<<23)*math.log2(i))) if i else "0" for i in range(256)]
|
||||
// print(',\n'.join([' '+','.join(v)
|
||||
// for v in batched([i.rjust(9) for i in a],7)]))
|
||||
const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
||||
0, 0, 8388608, 13295629, 16777216, 19477745, 21684237,
|
||||
23549800, 25165824, 26591258, 27866353, 29019816, 30072845, 31041538,
|
||||
31938408, 32773374, 33554432, 34288123, 34979866, 35634199, 36254961,
|
||||
36845429, 37408424, 37946388, 38461453, 38955489, 39430146, 39886887,
|
||||
40327016, 40751698, 41161982, 41558811, 41943040, 42315445, 42676731,
|
||||
43027545, 43368474, 43700062, 44022807, 44337167, 44643569, 44942404,
|
||||
45234037, 45518808, 45797032, 46069003, 46334996, 46595268, 46850061,
|
||||
47099600, 47344097, 47583753, 47818754, 48049279, 48275495, 48497560,
|
||||
48715624, 48929828, 49140306, 49347187, 49550590, 49750631, 49947419,
|
||||
50141058, 50331648, 50519283, 50704053, 50886044, 51065339, 51242017,
|
||||
51416153, 51587818, 51757082, 51924012, 52088670, 52251118, 52411415,
|
||||
52569616, 52725775, 52879946, 53032177, 53182516, 53331012, 53477707,
|
||||
53622645, 53765868, 53907416, 54047327, 54185640, 54322389, 54457611,
|
||||
54591338, 54723604, 54854440, 54983876, 55111943, 55238669, 55364082,
|
||||
55488208, 55611074, 55732705, 55853126, 55972361, 56090432, 56207362,
|
||||
56323174, 56437887, 56551524, 56664103, 56775645, 56886168, 56995691,
|
||||
57104232, 57211808, 57318436, 57424133, 57528914, 57632796, 57735795,
|
||||
57837923, 57939198, 58039632, 58139239, 58238033, 58336027, 58433234,
|
||||
58529666, 58625336, 58720256, 58814437, 58907891, 59000628, 59092661,
|
||||
59183999, 59274652, 59364632, 59453947, 59542609, 59630625, 59718006,
|
||||
59804761, 59890898, 59976426, 60061354, 60145690, 60229443, 60312620,
|
||||
60395229, 60477278, 60558775, 60639726, 60720140, 60800023, 60879382,
|
||||
60958224, 61036555, 61114383, 61191714, 61268554, 61344908, 61420785,
|
||||
61496188, 61571124, 61645600, 61719620, 61793189, 61866315, 61939001,
|
||||
62011253, 62083076, 62154476, 62225457, 62296024, 62366182, 62435935,
|
||||
62505289, 62574248, 62642816, 62710997, 62778797, 62846219, 62913267,
|
||||
62979946, 63046260, 63112212, 63177807, 63243048, 63307939, 63372484,
|
||||
63436687, 63500551, 63564080, 63627277, 63690146, 63752690, 63814912,
|
||||
63876816, 63938405, 63999682, 64060650, 64121313, 64181673, 64241734,
|
||||
64301498, 64360969, 64420148, 64479040, 64537646, 64595970, 64654014,
|
||||
64711782, 64769274, 64826495, 64883447, 64940132, 64996553, 65052711,
|
||||
65108611, 65164253, 65219641, 65274776, 65329662, 65384299, 65438691,
|
||||
65492840, 65546747, 65600416, 65653847, 65707044, 65760008, 65812741,
|
||||
65865245, 65917522, 65969575, 66021404, 66073013, 66124403, 66175575,
|
||||
66226531, 66277275, 66327806, 66378127, 66428240, 66478146, 66527847,
|
||||
66577345, 66626641, 66675737, 66724635, 66773336, 66821842, 66870154,
|
||||
66918274, 66966204, 67013944, 67061497
|
||||
};
|
||||
|
||||
const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
||||
0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
|
||||
8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
|
||||
24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
|
||||
43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
|
||||
64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
|
||||
86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
|
||||
110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
|
||||
134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
|
||||
160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
|
||||
186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
|
||||
212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
|
||||
240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
|
||||
268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
|
||||
296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
|
||||
325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
|
||||
354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
|
||||
384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
|
||||
413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
|
||||
444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
|
||||
474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
|
||||
505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
|
||||
536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
|
||||
568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
|
||||
600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
|
||||
632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
|
||||
664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
|
||||
696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
|
||||
729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
|
||||
762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
|
||||
795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
|
||||
828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
|
||||
862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
|
||||
896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
|
||||
929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
|
||||
963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
|
||||
998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
|
||||
1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
|
||||
1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
|
||||
1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
|
||||
1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
|
||||
1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
|
||||
1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
|
||||
1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
|
||||
1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
|
||||
1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
|
||||
1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
|
||||
1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
|
||||
1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
|
||||
1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
|
||||
1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
|
||||
1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
|
||||
1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
|
||||
1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
|
||||
1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
|
||||
1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
|
||||
1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
|
||||
1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
|
||||
1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
|
||||
1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
|
||||
1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
|
||||
1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
|
||||
1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
|
||||
1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
|
||||
2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
|
||||
// lookup table for small values of int*log2(int) * (1 << LOG_2_PRECISION_BITS).
|
||||
// Obtained in Python with:
|
||||
// a=[ "%d"%i if i<(1<<32) else "%dull"%i
|
||||
// for i in [ round((1<<LOG_2_PRECISION_BITS)*math.log2(i)*i) if i
|
||||
// else 0 for i in range(256)]]
|
||||
// print(',\n '.join([','.join(v) for v in batched([i.rjust(15)
|
||||
// for i in a],4)]))
|
||||
const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
||||
0, 0, 16777216, 39886887,
|
||||
67108864, 97388723, 130105423, 164848600,
|
||||
201326592, 239321324, 278663526, 319217973,
|
||||
360874141, 403539997, 447137711, 491600606,
|
||||
536870912, 582898099, 629637592, 677049776,
|
||||
725099212, 773754010, 822985323, 872766924,
|
||||
923074875, 973887230, 1025183802, 1076945958,
|
||||
1129156447, 1181799249, 1234859451, 1288323135,
|
||||
1342177280, 1396409681, 1451008871, 1505964059,
|
||||
1561265072, 1616902301, 1672866655, 1729149526,
|
||||
1785742744, 1842638548, 1899829557, 1957308741,
|
||||
2015069397, 2073105127, 2131409817, 2189977618ull,
|
||||
2248802933ull, 2307880396ull, 2367204859ull, 2426771383ull,
|
||||
2486575220ull, 2546611805ull, 2606876748ull, 2667365819ull,
|
||||
2728074942ull, 2789000187ull, 2850137762ull, 2911484006ull,
|
||||
2973035382ull, 3034788471ull, 3096739966ull, 3158886666ull,
|
||||
3221225472ull, 3283753383ull, 3346467489ull, 3409364969ull,
|
||||
3472443085ull, 3535699182ull, 3599130679ull, 3662735070ull,
|
||||
3726509920ull, 3790452862ull, 3854561593ull, 3918833872ull,
|
||||
3983267519ull, 4047860410ull, 4112610476ull, 4177515704ull,
|
||||
4242574127ull, 4307783833ull, 4373142952ull, 4438649662ull,
|
||||
4504302186ull, 4570098787ull, 4636037770ull, 4702117480ull,
|
||||
4768336298ull, 4834692645ull, 4901184974ull, 4967811774ull,
|
||||
5034571569ull, 5101462912ull, 5168484389ull, 5235634615ull,
|
||||
5302912235ull, 5370315922ull, 5437844376ull, 5505496324ull,
|
||||
5573270518ull, 5641165737ull, 5709180782ull, 5777314477ull,
|
||||
5845565671ull, 5913933235ull, 5982416059ull, 6051013057ull,
|
||||
6119723161ull, 6188545324ull, 6257478518ull, 6326521733ull,
|
||||
6395673979ull, 6464934282ull, 6534301685ull, 6603775250ull,
|
||||
6673354052ull, 6743037185ull, 6812823756ull, 6882712890ull,
|
||||
6952703725ull, 7022795412ull, 7092987118ull, 7163278025ull,
|
||||
7233667324ull, 7304154222ull, 7374737939ull, 7445417707ull,
|
||||
7516192768ull, 7587062379ull, 7658025806ull, 7729082328ull,
|
||||
7800231234ull, 7871471825ull, 7942803410ull, 8014225311ull,
|
||||
8085736859ull, 8157337394ull, 8229026267ull, 8300802839ull,
|
||||
8372666477ull, 8444616560ull, 8516652476ull, 8588773618ull,
|
||||
8660979393ull, 8733269211ull, 8805642493ull, 8878098667ull,
|
||||
8950637170ull, 9023257446ull, 9095958945ull, 9168741125ull,
|
||||
9241603454ull, 9314545403ull, 9387566451ull, 9460666086ull,
|
||||
9533843800ull, 9607099093ull, 9680431471ull, 9753840445ull,
|
||||
9827325535ull, 9900886263ull, 9974522161ull, 10048232765ull,
|
||||
10122017615ull, 10195876260ull, 10269808253ull, 10343813150ull,
|
||||
10417890516ull, 10492039919ull, 10566260934ull, 10640553138ull,
|
||||
10714916116ull, 10789349456ull, 10863852751ull, 10938425600ull,
|
||||
11013067604ull, 11087778372ull, 11162557513ull, 11237404645ull,
|
||||
11312319387ull, 11387301364ull, 11462350205ull, 11537465541ull,
|
||||
11612647010ull, 11687894253ull, 11763206912ull, 11838584638ull,
|
||||
11914027082ull, 11989533899ull, 12065104750ull, 12140739296ull,
|
||||
12216437206ull, 12292198148ull, 12368021795ull, 12443907826ull,
|
||||
12519855920ull, 12595865759ull, 12671937032ull, 12748069427ull,
|
||||
12824262637ull, 12900516358ull, 12976830290ull, 13053204134ull,
|
||||
13129637595ull, 13206130381ull, 13282682202ull, 13359292772ull,
|
||||
13435961806ull, 13512689025ull, 13589474149ull, 13666316903ull,
|
||||
13743217014ull, 13820174211ull, 13897188225ull, 13974258793ull,
|
||||
14051385649ull, 14128568535ull, 14205807192ull, 14283101363ull,
|
||||
14360450796ull, 14437855239ull, 14515314443ull, 14592828162ull,
|
||||
14670396151ull, 14748018167ull, 14825693972ull, 14903423326ull,
|
||||
14981205995ull, 15059041743ull, 15136930339ull, 15214871554ull,
|
||||
15292865160ull, 15370910930ull, 15449008641ull, 15527158071ull,
|
||||
15605359001ull, 15683611210ull, 15761914485ull, 15840268608ull,
|
||||
15918673369ull, 15997128556ull, 16075633960ull, 16154189373ull,
|
||||
16232794589ull, 16311449405ull, 16390153617ull, 16468907026ull,
|
||||
16547709431ull, 16626560636ull, 16705460444ull, 16784408661ull,
|
||||
16863405094ull, 16942449552ull, 17021541845ull, 17100681785ull
|
||||
};
|
||||
|
||||
const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
|
||||
@ -326,23 +249,19 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
|
||||
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
|
||||
};
|
||||
|
||||
static float FastSLog2Slow_C(uint32_t v) {
|
||||
static uint64_t FastSLog2Slow_C(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
const uint64_t orig_v = v;
|
||||
uint64_t correction;
|
||||
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
|
||||
// use clz if available
|
||||
const int log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint64_t log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint32_t y = 1 << log_cnt;
|
||||
int correction = 0;
|
||||
const float v_f = (float)v;
|
||||
const uint32_t orig_v = v;
|
||||
v >>= log_cnt;
|
||||
#else
|
||||
int log_cnt = 0;
|
||||
uint64_t log_cnt = 0;
|
||||
uint32_t y = 1;
|
||||
int correction = 0;
|
||||
const float v_f = (float)v;
|
||||
const uint32_t orig_v = v;
|
||||
do {
|
||||
++log_cnt;
|
||||
v = v >> 1;
|
||||
@ -354,45 +273,43 @@ static float FastSLog2Slow_C(uint32_t v) {
|
||||
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
|
||||
// The correction factor: log(1 + d) ~ d; for very small d values, so
|
||||
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
|
||||
// LOG_2_RECIPROCAL ~ 23/16
|
||||
correction = (23 * (orig_v & (y - 1))) >> 4;
|
||||
return v_f * (kLog2Table[v] + log_cnt) + correction;
|
||||
correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1));
|
||||
return orig_v * (kLog2Table[v] + (log_cnt << LOG_2_PRECISION_BITS)) +
|
||||
correction;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
|
||||
return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);
|
||||
}
|
||||
}
|
||||
|
||||
static float FastLog2Slow_C(uint32_t v) {
|
||||
static uint32_t FastLog2Slow_C(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
const uint32_t orig_v = v;
|
||||
uint32_t log_2;
|
||||
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
|
||||
// use clz if available
|
||||
const int log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint32_t log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint32_t y = 1 << log_cnt;
|
||||
const uint32_t orig_v = v;
|
||||
double log_2;
|
||||
v >>= log_cnt;
|
||||
#else
|
||||
int log_cnt = 0;
|
||||
uint32_t log_cnt = 0;
|
||||
uint32_t y = 1;
|
||||
const uint32_t orig_v = v;
|
||||
double log_2;
|
||||
do {
|
||||
++log_cnt;
|
||||
v = v >> 1;
|
||||
y = y << 1;
|
||||
} while (v >= LOG_LOOKUP_IDX_MAX);
|
||||
#endif
|
||||
log_2 = kLog2Table[v] + log_cnt;
|
||||
log_2 = kLog2Table[v] + (log_cnt << LOG_2_PRECISION_BITS);
|
||||
if (orig_v >= APPROX_LOG_MAX) {
|
||||
// Since the division is still expensive, add this correction factor only
|
||||
// for large values of 'v'.
|
||||
const int correction = (23 * (orig_v & (y - 1))) >> 4;
|
||||
log_2 += (double)correction / orig_v;
|
||||
const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1));
|
||||
log_2 += (uint32_t)DivRound(correction, orig_v);
|
||||
}
|
||||
return (float)log_2;
|
||||
return log_2;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * log((double)v));
|
||||
return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -400,37 +317,53 @@ static float FastLog2Slow_C(uint32_t v) {
|
||||
// Methods to calculate Entropy (Shannon).
|
||||
|
||||
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
|
||||
static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
|
||||
static uint64_t CombinedShannonEntropy_C(const uint32_t X[256],
|
||||
const uint32_t Y[256]) {
|
||||
int i;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
uint64_t retval = 0;
|
||||
uint32_t sumX = 0, sumXY = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
const int x = X[i];
|
||||
const uint32_t x = X[i];
|
||||
if (x != 0) {
|
||||
const int xy = x + Y[i];
|
||||
const uint32_t xy = x + Y[i];
|
||||
sumX += x;
|
||||
retval -= VP8LFastSLog2(x);
|
||||
retval += VP8LFastSLog2(x);
|
||||
sumXY += xy;
|
||||
retval -= VP8LFastSLog2(xy);
|
||||
retval += VP8LFastSLog2(xy);
|
||||
} else if (Y[i] != 0) {
|
||||
sumXY += Y[i];
|
||||
retval -= VP8LFastSLog2(Y[i]);
|
||||
retval += VP8LFastSLog2(Y[i]);
|
||||
}
|
||||
}
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static uint64_t ShannonEntropy_C(const uint32_t* X, int n) {
|
||||
int i;
|
||||
uint64_t retval = 0;
|
||||
uint32_t sumX = 0;
|
||||
for (i = 0; i < n; ++i) {
|
||||
const int x = X[i];
|
||||
if (x != 0) {
|
||||
sumX += x;
|
||||
retval += VP8LFastSLog2(x);
|
||||
}
|
||||
}
|
||||
retval = VP8LFastSLog2(sumX) - retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
|
||||
entropy->entropy = 0.;
|
||||
entropy->entropy = 0;
|
||||
entropy->sum = 0;
|
||||
entropy->nonzeros = 0;
|
||||
entropy->max_val = 0;
|
||||
entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
|
||||
}
|
||||
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
VP8LBitEntropy* const entropy) {
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy) {
|
||||
int i;
|
||||
|
||||
VP8LBitEntropyInit(entropy);
|
||||
@ -440,18 +373,20 @@ void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
entropy->sum += array[i];
|
||||
entropy->nonzero_code = i;
|
||||
++entropy->nonzeros;
|
||||
entropy->entropy -= VP8LFastSLog2(array[i]);
|
||||
entropy->entropy += VP8LFastSLog2(array[i]);
|
||||
if (entropy->max_val < array[i]) {
|
||||
entropy->max_val = array[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
entropy->entropy += VP8LFastSLog2(entropy->sum);
|
||||
entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
|
||||
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
|
||||
int* WEBP_RESTRICT const i_prev,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
const int streak = i - *i_prev;
|
||||
|
||||
// Gather info for the bit entropy.
|
||||
@ -459,7 +394,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
bit_entropy->sum += (*val_prev) * streak;
|
||||
bit_entropy->nonzeros += streak;
|
||||
bit_entropy->nonzero_code = *i_prev;
|
||||
bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
|
||||
bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;
|
||||
if (bit_entropy->max_val < *val_prev) {
|
||||
bit_entropy->max_val = *val_prev;
|
||||
}
|
||||
@ -473,9 +408,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
*i_prev = i;
|
||||
}
|
||||
|
||||
static void GetEntropyUnrefined_C(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetEntropyUnrefined_C(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i;
|
||||
int i_prev = 0;
|
||||
uint32_t x_prev = X[0];
|
||||
@ -491,14 +427,13 @@ static void GetEntropyUnrefined_C(const uint32_t X[], int length,
|
||||
}
|
||||
GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
|
||||
|
||||
bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
|
||||
const uint32_t Y[],
|
||||
int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetCombinedEntropyUnrefined_C(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i = 1;
|
||||
int i_prev = 0;
|
||||
uint32_t xy_prev = X[0] + Y[0];
|
||||
@ -514,7 +449,7 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
|
||||
}
|
||||
GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);
|
||||
|
||||
bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -538,8 +473,8 @@ static WEBP_INLINE int8_t U32ToS8(uint32_t v) {
|
||||
return (int8_t)(v & 0xff);
|
||||
}
|
||||
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint32_t argb = data[i];
|
||||
@ -575,9 +510,10 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
|
||||
return (new_blue & 0xff);
|
||||
}
|
||||
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]) {
|
||||
int green_to_red, uint32_t histo[]) {
|
||||
while (tile_height-- > 0) {
|
||||
int x;
|
||||
for (x = 0; x < tile_width; ++x) {
|
||||
@ -587,10 +523,11 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
int histo[]) {
|
||||
uint32_t histo[]) {
|
||||
while (tile_height-- > 0) {
|
||||
int x;
|
||||
for (x = 0; x < tile_width; ++x) {
|
||||
@ -614,8 +551,8 @@ static int VectorMismatch_C(const uint32_t* const array1,
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst) {
|
||||
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits, uint32_t* WEBP_RESTRICT dst) {
|
||||
int x;
|
||||
if (xbits > 0) {
|
||||
const int bit_depth = 1 << (3 - xbits);
|
||||
@ -646,40 +583,29 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
int length) {
|
||||
int i;
|
||||
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
|
||||
assert(length % 2 == 0);
|
||||
for (i = 2; i < length / 2 - 1; ++i) {
|
||||
const int xy0 = X[2 * i + 2] + Y[2 * i + 2];
|
||||
const int xy1 = X[2 * i + 3] + Y[2 * i + 3];
|
||||
cost += i * (xy0 + xy1);
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void AddVector_C(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
int size) {
|
||||
static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) out[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
|
||||
static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) out[i] += a[i];
|
||||
}
|
||||
|
||||
#define ADD(X, ARG, LEN) do { \
|
||||
if (a->is_used_[X]) { \
|
||||
if (b->is_used_[X]) { \
|
||||
if (a->is_used[X]) { \
|
||||
if (b->is_used[X]) { \
|
||||
VP8LAddVector(a->ARG, b->ARG, out->ARG, (LEN)); \
|
||||
} else { \
|
||||
memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
|
||||
} \
|
||||
} else if (b->is_used_[X]) { \
|
||||
} else if (b->is_used[X]) { \
|
||||
memcpy(&out->ARG[0], &b->ARG[0], (LEN) * sizeof(out->ARG[0])); \
|
||||
} else { \
|
||||
memset(&out->ARG[0], 0, (LEN) * sizeof(out->ARG[0])); \
|
||||
@ -687,8 +613,8 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
|
||||
} while (0)
|
||||
|
||||
#define ADD_EQ(X, ARG, LEN) do { \
|
||||
if (a->is_used_[X]) { \
|
||||
if (out->is_used_[X]) { \
|
||||
if (a->is_used[X]) { \
|
||||
if (out->is_used[X]) { \
|
||||
VP8LAddVectorEq(a->ARG, out->ARG, (LEN)); \
|
||||
} else { \
|
||||
memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
|
||||
@ -696,28 +622,29 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b, VP8LHistogram* const out) {
|
||||
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
|
||||
const VP8LHistogram* WEBP_RESTRICT const b,
|
||||
VP8LHistogram* WEBP_RESTRICT const out) {
|
||||
int i;
|
||||
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits);
|
||||
assert(a->palette_code_bits == b->palette_code_bits);
|
||||
|
||||
if (b != out) {
|
||||
ADD(0, literal_, literal_size);
|
||||
ADD(1, red_, NUM_LITERAL_CODES);
|
||||
ADD(2, blue_, NUM_LITERAL_CODES);
|
||||
ADD(3, alpha_, NUM_LITERAL_CODES);
|
||||
ADD(4, distance_, NUM_DISTANCE_CODES);
|
||||
ADD(0, literal, literal_size);
|
||||
ADD(1, red, NUM_LITERAL_CODES);
|
||||
ADD(2, blue, NUM_LITERAL_CODES);
|
||||
ADD(3, alpha, NUM_LITERAL_CODES);
|
||||
ADD(4, distance, NUM_DISTANCE_CODES);
|
||||
for (i = 0; i < 5; ++i) {
|
||||
out->is_used_[i] = (a->is_used_[i] | b->is_used_[i]);
|
||||
out->is_used[i] = (a->is_used[i] | b->is_used[i]);
|
||||
}
|
||||
} else {
|
||||
ADD_EQ(0, literal_, literal_size);
|
||||
ADD_EQ(1, red_, NUM_LITERAL_CODES);
|
||||
ADD_EQ(2, blue_, NUM_LITERAL_CODES);
|
||||
ADD_EQ(3, alpha_, NUM_LITERAL_CODES);
|
||||
ADD_EQ(4, distance_, NUM_DISTANCE_CODES);
|
||||
for (i = 0; i < 5; ++i) out->is_used_[i] |= a->is_used_[i];
|
||||
ADD_EQ(0, literal, literal_size);
|
||||
ADD_EQ(1, red, NUM_LITERAL_CODES);
|
||||
ADD_EQ(2, blue, NUM_LITERAL_CODES);
|
||||
ADD_EQ(3, alpha, NUM_LITERAL_CODES);
|
||||
ADD_EQ(4, distance, NUM_DISTANCE_CODES);
|
||||
for (i = 0; i < 5; ++i) out->is_used[i] |= a->is_used[i];
|
||||
}
|
||||
}
|
||||
#undef ADD
|
||||
@ -727,14 +654,14 @@ void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
// Image transforms.
|
||||
|
||||
static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], ARGB_BLACK);
|
||||
(void)upper;
|
||||
}
|
||||
|
||||
static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], in[i - 1]);
|
||||
(void)upper;
|
||||
@ -745,7 +672,8 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \
|
||||
static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \
|
||||
const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
@ -771,18 +699,22 @@ GENERATE_PREDICTOR_SUB(13)
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed_SSE;
|
||||
|
||||
VP8LTransformColorFunc VP8LTransformColor;
|
||||
VP8LTransformColorFunc VP8LTransformColor_SSE;
|
||||
|
||||
VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
|
||||
VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms_SSE;
|
||||
VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
|
||||
VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms_SSE;
|
||||
|
||||
VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||
VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||
VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;
|
||||
|
||||
VP8LCostFunc VP8LExtraCost;
|
||||
VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||
VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||
VP8LShannonEntropyFunc VP8LShannonEntropy;
|
||||
|
||||
VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
|
||||
VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
|
||||
@ -792,13 +724,16 @@ VP8LAddVectorEqFunc VP8LAddVectorEq;
|
||||
|
||||
VP8LVectorMismatchFunc VP8LVectorMismatch;
|
||||
VP8LBundleColorMapFunc VP8LBundleColorMap;
|
||||
VP8LBundleColorMapFunc VP8LBundleColorMap_SSE;
|
||||
|
||||
VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
|
||||
VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
||||
VP8LPredictorAddSubFunc VP8LPredictorsSub_SSE[16];
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8LEncDspInitSSE2(void);
|
||||
extern void VP8LEncDspInitSSE41(void);
|
||||
extern void VP8LEncDspInitAVX2(void);
|
||||
extern void VP8LEncDspInitNEON(void);
|
||||
extern void VP8LEncDspInitMIPS32(void);
|
||||
extern void VP8LEncDspInitMIPSdspR2(void);
|
||||
@ -820,8 +755,8 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
||||
VP8LFastSLog2Slow = FastSLog2Slow_C;
|
||||
|
||||
VP8LExtraCost = ExtraCost_C;
|
||||
VP8LExtraCostCombined = ExtraCostCombined_C;
|
||||
VP8LCombinedShannonEntropy = CombinedShannonEntropy_C;
|
||||
VP8LShannonEntropy = ShannonEntropy_C;
|
||||
|
||||
VP8LGetEntropyUnrefined = GetEntropyUnrefined_C;
|
||||
VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_C;
|
||||
@ -874,6 +809,11 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
VP8LEncDspInitSSE41();
|
||||
#if defined(WEBP_HAVE_AVX2)
|
||||
if (VP8GetCPUInfo(kAVX2)) {
|
||||
VP8LEncDspInitAVX2();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -909,8 +849,8 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
||||
assert(VP8LFastLog2Slow != NULL);
|
||||
assert(VP8LFastSLog2Slow != NULL);
|
||||
assert(VP8LExtraCost != NULL);
|
||||
assert(VP8LExtraCostCombined != NULL);
|
||||
assert(VP8LCombinedShannonEntropy != NULL);
|
||||
assert(VP8LShannonEntropy != NULL);
|
||||
assert(VP8LGetEntropyUnrefined != NULL);
|
||||
assert(VP8LGetCombinedEntropyUnrefined != NULL);
|
||||
assert(VP8LAddVector != NULL);
|
||||
|
733
src/dsp/lossless_enc_avx2.c
Normal file
733
src/dsp/lossless_enc_avx2.c
Normal file
@ -0,0 +1,733 @@
|
||||
// Copyright 2025 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// AVX2 variant of methods for lossless encoder
|
||||
//
|
||||
// Author: Vincent Rabaud (vrabaud@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_AVX2)
|
||||
#include <assert.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Subtract-Green Transform
|
||||
|
||||
static void SubtractGreenFromBlueAndRed_AVX2(uint32_t* argb_data,
|
||||
int num_pixels) {
|
||||
int i;
|
||||
const __m256i kCstShuffle = _mm256_set_epi8(
|
||||
-1, 29, -1, 29, -1, 25, -1, 25, -1, 21, -1, 21, -1, 17, -1, 17, -1, 13,
|
||||
-1, 13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1);
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i in = _mm256_loadu_si256((__m256i*)&argb_data[i]); // argb
|
||||
const __m256i in_0g0g = _mm256_shuffle_epi8(in, kCstShuffle);
|
||||
const __m256i out = _mm256_sub_epi8(in, in_0g0g);
|
||||
_mm256_storeu_si256((__m256i*)&argb_data[i], out);
|
||||
}
|
||||
// fallthrough and finish off with plain-SSE
|
||||
if (i != num_pixels) {
|
||||
VP8LSubtractGreenFromBlueAndRed_SSE(argb_data + i, num_pixels - i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
// For sign-extended multiplying constants, pre-shifted by 5:
|
||||
#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
|
||||
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm256_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void TransformColor_AVX2(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels) {
|
||||
const __m256i mults_rb =
|
||||
MK_CST_16(CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_));
|
||||
const __m256i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
|
||||
const __m256i mask_rb = _mm256_set1_epi32(0x00ff00ff); // red-blue masks
|
||||
const __m256i kCstShuffle = _mm256_set_epi8(
|
||||
29, -1, 29, -1, 25, -1, 25, -1, 21, -1, 21, -1, 17, -1, 17, -1, 13, -1,
|
||||
13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1, -1);
|
||||
int i;
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i in = _mm256_loadu_si256((__m256i*)&argb_data[i]); // argb
|
||||
const __m256i A = _mm256_shuffle_epi8(in, kCstShuffle); // g0g0
|
||||
const __m256i B = _mm256_mulhi_epi16(A, mults_rb); // x dr x db1
|
||||
const __m256i C = _mm256_slli_epi16(in, 8); // r 0 b 0
|
||||
const __m256i D = _mm256_mulhi_epi16(C, mults_b2); // x db2 0 0
|
||||
const __m256i E = _mm256_srli_epi32(D, 16); // 0 0 x db2
|
||||
const __m256i F = _mm256_add_epi8(E, B); // x dr x db
|
||||
const __m256i G = _mm256_and_si256(F, mask_rb); // 0 dr 0 db
|
||||
const __m256i out = _mm256_sub_epi8(in, G);
|
||||
_mm256_storeu_si256((__m256i*)&argb_data[i], out);
|
||||
}
|
||||
// fallthrough and finish off with plain-C
|
||||
if (i != num_pixels) {
|
||||
VP8LTransformColor_SSE(m, argb_data + i, num_pixels - i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#define SPAN 16
|
||||
static void CollectColorBlueTransforms_AVX2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride, int tile_width,
|
||||
int tile_height, int green_to_blue,
|
||||
int red_to_blue, uint32_t histo[]) {
|
||||
const __m256i mult =
|
||||
MK_CST_16(CST_5b(red_to_blue) + 256, CST_5b(green_to_blue));
|
||||
const __m256i perm = _mm256_setr_epi8(
|
||||
-1, 1, -1, 2, -1, 5, -1, 6, -1, 9, -1, 10, -1, 13, -1, 14, -1, 17, -1, 18,
|
||||
-1, 21, -1, 22, -1, 25, -1, 26, -1, 29, -1, 30);
|
||||
if (tile_width >= 8) {
|
||||
int y, i;
|
||||
for (y = 0; y < tile_height; ++y) {
|
||||
uint8_t values[32];
|
||||
const uint32_t* const src = argb + y * stride;
|
||||
const __m256i A1 = _mm256_loadu_si256((const __m256i*)src);
|
||||
const __m256i B1 = _mm256_shuffle_epi8(A1, perm);
|
||||
const __m256i C1 = _mm256_mulhi_epi16(B1, mult);
|
||||
const __m256i D1 = _mm256_sub_epi16(A1, C1);
|
||||
__m256i E = _mm256_add_epi16(_mm256_srli_epi32(D1, 16), D1);
|
||||
int x;
|
||||
for (x = 8; x + 8 <= tile_width; x += 8) {
|
||||
const __m256i A2 = _mm256_loadu_si256((const __m256i*)(src + x));
|
||||
__m256i B2, C2, D2;
|
||||
_mm256_storeu_si256((__m256i*)values, E);
|
||||
for (i = 0; i < 32; i += 4) ++histo[values[i]];
|
||||
B2 = _mm256_shuffle_epi8(A2, perm);
|
||||
C2 = _mm256_mulhi_epi16(B2, mult);
|
||||
D2 = _mm256_sub_epi16(A2, C2);
|
||||
E = _mm256_add_epi16(_mm256_srli_epi32(D2, 16), D2);
|
||||
}
|
||||
_mm256_storeu_si256((__m256i*)values, E);
|
||||
for (i = 0; i < 32; i += 4) ++histo[values[i]];
|
||||
}
|
||||
}
|
||||
{
|
||||
const int left_over = tile_width & 7;
|
||||
if (left_over > 0) {
|
||||
VP8LCollectColorBlueTransforms_SSE(argb + tile_width - left_over, stride,
|
||||
left_over, tile_height, green_to_blue,
|
||||
red_to_blue, histo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_AVX2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride, int tile_width,
|
||||
int tile_height, int green_to_red,
|
||||
uint32_t histo[]) {
|
||||
const __m256i mult = MK_CST_16(0, CST_5b(green_to_red));
|
||||
const __m256i mask_g = _mm256_set1_epi32(0x0000ff00);
|
||||
if (tile_width >= 8) {
|
||||
int y, i;
|
||||
for (y = 0; y < tile_height; ++y) {
|
||||
uint8_t values[32];
|
||||
const uint32_t* const src = argb + y * stride;
|
||||
const __m256i A1 = _mm256_loadu_si256((const __m256i*)src);
|
||||
const __m256i B1 = _mm256_and_si256(A1, mask_g);
|
||||
const __m256i C1 = _mm256_madd_epi16(B1, mult);
|
||||
__m256i D = _mm256_sub_epi16(A1, C1);
|
||||
int x;
|
||||
for (x = 8; x + 8 <= tile_width; x += 8) {
|
||||
const __m256i A2 = _mm256_loadu_si256((const __m256i*)(src + x));
|
||||
__m256i B2, C2;
|
||||
_mm256_storeu_si256((__m256i*)values, D);
|
||||
for (i = 2; i < 32; i += 4) ++histo[values[i]];
|
||||
B2 = _mm256_and_si256(A2, mask_g);
|
||||
C2 = _mm256_madd_epi16(B2, mult);
|
||||
D = _mm256_sub_epi16(A2, C2);
|
||||
}
|
||||
_mm256_storeu_si256((__m256i*)values, D);
|
||||
for (i = 2; i < 32; i += 4) ++histo[values[i]];
|
||||
}
|
||||
}
|
||||
{
|
||||
const int left_over = tile_width & 7;
|
||||
if (left_over > 0) {
|
||||
VP8LCollectColorRedTransforms_SSE(argb + tile_width - left_over, stride,
|
||||
left_over, tile_height, green_to_red,
|
||||
histo);
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef SPAN
|
||||
#undef MK_CST_16
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Note we are adding uint32_t's as *signed* int32's (using _mm256_add_epi32).
|
||||
// But that's ok since the histogram values are less than 1<<28 (max picture
|
||||
// size).
|
||||
static void AddVector_AVX2(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~31;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 32);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
|
||||
const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
|
||||
const __m256i a2 = _mm256_loadu_si256((const __m256i*)&a[i + 16]);
|
||||
const __m256i a3 = _mm256_loadu_si256((const __m256i*)&a[i + 24]);
|
||||
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&b[i + 0]);
|
||||
const __m256i b1 = _mm256_loadu_si256((const __m256i*)&b[i + 8]);
|
||||
const __m256i b2 = _mm256_loadu_si256((const __m256i*)&b[i + 16]);
|
||||
const __m256i b3 = _mm256_loadu_si256((const __m256i*)&b[i + 24]);
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 16], _mm256_add_epi32(a2, b2));
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 24], _mm256_add_epi32(a3, b3));
|
||||
i += 32;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 16) != 0) {
|
||||
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
|
||||
const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
|
||||
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&b[i + 0]);
|
||||
const __m256i b1 = _mm256_loadu_si256((const __m256i*)&b[i + 8]);
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
|
||||
i += 16;
|
||||
}
|
||||
|
||||
size &= 15;
|
||||
if (size == 8) {
|
||||
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i]);
|
||||
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&b[i]);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], _mm256_add_epi32(a0, b0));
|
||||
} else {
|
||||
for (; size--; ++i) {
|
||||
out[i] = a[i] + b[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void AddVectorEq_AVX2(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~31;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 32);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
|
||||
const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
|
||||
const __m256i a2 = _mm256_loadu_si256((const __m256i*)&a[i + 16]);
|
||||
const __m256i a3 = _mm256_loadu_si256((const __m256i*)&a[i + 24]);
|
||||
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&out[i + 0]);
|
||||
const __m256i b1 = _mm256_loadu_si256((const __m256i*)&out[i + 8]);
|
||||
const __m256i b2 = _mm256_loadu_si256((const __m256i*)&out[i + 16]);
|
||||
const __m256i b3 = _mm256_loadu_si256((const __m256i*)&out[i + 24]);
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 16], _mm256_add_epi32(a2, b2));
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 24], _mm256_add_epi32(a3, b3));
|
||||
i += 32;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 16) != 0) {
|
||||
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
|
||||
const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
|
||||
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&out[i + 0]);
|
||||
const __m256i b1 = _mm256_loadu_si256((const __m256i*)&out[i + 8]);
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
|
||||
_mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
|
||||
i += 16;
|
||||
}
|
||||
|
||||
size &= 15;
|
||||
if (size == 8) {
|
||||
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i]);
|
||||
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&out[i]);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], _mm256_add_epi32(a0, b0));
|
||||
} else {
|
||||
for (; size--; ++i) {
|
||||
out[i] += a[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entropy
|
||||
|
||||
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
|
||||
|
||||
static uint64_t CombinedShannonEntropy_AVX2(const uint32_t X[256],
|
||||
const uint32_t Y[256]) {
|
||||
int i;
|
||||
uint64_t retval = 0;
|
||||
uint32_t sumX = 0, sumXY = 0;
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
|
||||
for (i = 0; i < 256; i += 32) {
|
||||
const __m256i x0 = _mm256_loadu_si256((const __m256i*)(X + i + 0));
|
||||
const __m256i y0 = _mm256_loadu_si256((const __m256i*)(Y + i + 0));
|
||||
const __m256i x1 = _mm256_loadu_si256((const __m256i*)(X + i + 8));
|
||||
const __m256i y1 = _mm256_loadu_si256((const __m256i*)(Y + i + 8));
|
||||
const __m256i x2 = _mm256_loadu_si256((const __m256i*)(X + i + 16));
|
||||
const __m256i y2 = _mm256_loadu_si256((const __m256i*)(Y + i + 16));
|
||||
const __m256i x3 = _mm256_loadu_si256((const __m256i*)(X + i + 24));
|
||||
const __m256i y3 = _mm256_loadu_si256((const __m256i*)(Y + i + 24));
|
||||
const __m256i x4 = _mm256_packs_epi16(_mm256_packs_epi32(x0, x1),
|
||||
_mm256_packs_epi32(x2, x3));
|
||||
const __m256i y4 = _mm256_packs_epi16(_mm256_packs_epi32(y0, y1),
|
||||
_mm256_packs_epi32(y2, y3));
|
||||
// Packed pixels are actually in order: ... 17 16 12 11 10 9 8 3 2 1 0
|
||||
const __m256i x5 = _mm256_permutevar8x32_epi32(
|
||||
x4, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0));
|
||||
const __m256i y5 = _mm256_permutevar8x32_epi32(
|
||||
y4, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0));
|
||||
const uint32_t mx =
|
||||
(uint32_t)_mm256_movemask_epi8(_mm256_cmpgt_epi8(x5, zero));
|
||||
uint32_t my =
|
||||
(uint32_t)_mm256_movemask_epi8(_mm256_cmpgt_epi8(y5, zero)) | mx;
|
||||
while (my) {
|
||||
const int32_t j = BitsCtz(my);
|
||||
uint32_t xy;
|
||||
if ((mx >> j) & 1) {
|
||||
const int x = X[i + j];
|
||||
sumXY += x;
|
||||
retval += VP8LFastSLog2(x);
|
||||
}
|
||||
xy = X[i + j] + Y[i + j];
|
||||
sumX += xy;
|
||||
retval += VP8LFastSLog2(xy);
|
||||
my &= my - 1;
|
||||
}
|
||||
}
|
||||
retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC // won't be faster
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int VectorMismatch_AVX2(const uint32_t* const array1,
|
||||
const uint32_t* const array2, int length) {
|
||||
int match_len;
|
||||
|
||||
if (length >= 24) {
|
||||
__m256i A0 = _mm256_loadu_si256((const __m256i*)&array1[0]);
|
||||
__m256i A1 = _mm256_loadu_si256((const __m256i*)&array2[0]);
|
||||
match_len = 0;
|
||||
do {
|
||||
// Loop unrolling and early load both provide a speedup of 10% for the
|
||||
// current function. Also, max_limit can be MAX_LENGTH=4096 at most.
|
||||
const __m256i cmpA = _mm256_cmpeq_epi32(A0, A1);
|
||||
const __m256i B0 =
|
||||
_mm256_loadu_si256((const __m256i*)&array1[match_len + 8]);
|
||||
const __m256i B1 =
|
||||
_mm256_loadu_si256((const __m256i*)&array2[match_len + 8]);
|
||||
if ((uint32_t)_mm256_movemask_epi8(cmpA) != 0xffffffff) break;
|
||||
match_len += 8;
|
||||
|
||||
{
|
||||
const __m256i cmpB = _mm256_cmpeq_epi32(B0, B1);
|
||||
A0 = _mm256_loadu_si256((const __m256i*)&array1[match_len + 8]);
|
||||
A1 = _mm256_loadu_si256((const __m256i*)&array2[match_len + 8]);
|
||||
if ((uint32_t)_mm256_movemask_epi8(cmpB) != 0xffffffff) break;
|
||||
match_len += 8;
|
||||
}
|
||||
} while (match_len + 24 < length);
|
||||
} else {
|
||||
match_len = 0;
|
||||
// Unroll the potential first two loops.
|
||||
if (length >= 8 &&
|
||||
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi32(
|
||||
_mm256_loadu_si256((const __m256i*)&array1[0]),
|
||||
_mm256_loadu_si256((const __m256i*)&array2[0]))) == 0xffffffff) {
|
||||
match_len = 8;
|
||||
if (length >= 16 &&
|
||||
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi32(
|
||||
_mm256_loadu_si256((const __m256i*)&array1[8]),
|
||||
_mm256_loadu_si256((const __m256i*)&array2[8]))) == 0xffffffff) {
|
||||
match_len = 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (match_len < length && array1[match_len] == array2[match_len]) {
|
||||
++match_len;
|
||||
}
|
||||
return match_len;
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
static void BundleColorMap_AVX2(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits,
|
||||
uint32_t* WEBP_RESTRICT dst) {
|
||||
int x = 0;
|
||||
assert(xbits >= 0);
|
||||
assert(xbits <= 3);
|
||||
switch (xbits) {
|
||||
case 0: {
|
||||
const __m256i ff = _mm256_set1_epi16((short)0xff00);
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
// Store 0xff000000 | (row[x] << 8).
|
||||
for (x = 0; x + 32 <= width; x += 32, dst += 32) {
|
||||
const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
|
||||
const __m256i in_lo = _mm256_unpacklo_epi8(zero, in);
|
||||
const __m256i dst0 = _mm256_unpacklo_epi16(in_lo, ff);
|
||||
const __m256i dst1 = _mm256_unpackhi_epi16(in_lo, ff);
|
||||
const __m256i in_hi = _mm256_unpackhi_epi8(zero, in);
|
||||
const __m256i dst2 = _mm256_unpacklo_epi16(in_hi, ff);
|
||||
const __m256i dst3 = _mm256_unpackhi_epi16(in_hi, ff);
|
||||
_mm256_storeu2_m128i((__m128i*)&dst[16], (__m128i*)&dst[0], dst0);
|
||||
_mm256_storeu2_m128i((__m128i*)&dst[20], (__m128i*)&dst[4], dst1);
|
||||
_mm256_storeu2_m128i((__m128i*)&dst[24], (__m128i*)&dst[8], dst2);
|
||||
_mm256_storeu2_m128i((__m128i*)&dst[28], (__m128i*)&dst[12], dst3);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
const __m256i ff = _mm256_set1_epi16((short)0xff00);
|
||||
const __m256i mul = _mm256_set1_epi16(0x110);
|
||||
for (x = 0; x + 32 <= width; x += 32, dst += 16) {
|
||||
// 0a0b | (where a/b are 4 bits).
|
||||
const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
|
||||
const __m256i tmp = _mm256_mullo_epi16(in, mul); // aba0
|
||||
const __m256i pack = _mm256_and_si256(tmp, ff); // ab00
|
||||
const __m256i dst0 = _mm256_unpacklo_epi16(pack, ff);
|
||||
const __m256i dst1 = _mm256_unpackhi_epi16(pack, ff);
|
||||
_mm256_storeu2_m128i((__m128i*)&dst[8], (__m128i*)&dst[0], dst0);
|
||||
_mm256_storeu2_m128i((__m128i*)&dst[12], (__m128i*)&dst[4], dst1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
const __m256i mask_or = _mm256_set1_epi32((int)0xff000000);
|
||||
const __m256i mul_cst = _mm256_set1_epi16(0x0104);
|
||||
const __m256i mask_mul = _mm256_set1_epi16(0x0f00);
|
||||
for (x = 0; x + 32 <= width; x += 32, dst += 8) {
|
||||
// 000a000b000c000d | (where a/b/c/d are 2 bits).
|
||||
const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
|
||||
const __m256i mul =
|
||||
_mm256_mullo_epi16(in, mul_cst); // 00ab00b000cd00d0
|
||||
const __m256i tmp =
|
||||
_mm256_and_si256(mul, mask_mul); // 00ab000000cd0000
|
||||
const __m256i shift = _mm256_srli_epi32(tmp, 12); // 00000000ab000000
|
||||
const __m256i pack = _mm256_or_si256(shift, tmp); // 00000000abcd0000
|
||||
// Convert to 0xff00**00.
|
||||
const __m256i res = _mm256_or_si256(pack, mask_or);
|
||||
_mm256_storeu_si256((__m256i*)dst, res);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
assert(xbits == 3);
|
||||
for (x = 0; x + 32 <= width; x += 32, dst += 4) {
|
||||
// 0000000a00000000b... | (where a/b are 1 bit).
|
||||
const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
|
||||
const __m256i shift = _mm256_slli_epi64(in, 7);
|
||||
const uint32_t move = _mm256_movemask_epi8(shift);
|
||||
dst[0] = 0xff000000 | ((move & 0xff) << 8);
|
||||
dst[1] = 0xff000000 | (move & 0xff00);
|
||||
dst[2] = 0xff000000 | ((move & 0xff0000) >> 8);
|
||||
dst[3] = 0xff000000 | ((move & 0xff000000) >> 16);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (x != width) {
|
||||
VP8LBundleColorMap_SSE(row + x, width - x, xbits, dst);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Batch version of Predictor Transform subtraction
|
||||
|
||||
static WEBP_INLINE void Average2_m256i(const __m256i* const a0,
|
||||
const __m256i* const a1,
|
||||
__m256i* const avg) {
|
||||
// (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
|
||||
const __m256i ones = _mm256_set1_epi8(1);
|
||||
const __m256i avg1 = _mm256_avg_epu8(*a0, *a1);
|
||||
const __m256i one = _mm256_and_si256(_mm256_xor_si256(*a0, *a1), ones);
|
||||
*avg = _mm256_sub_epi8(avg1, one);
|
||||
}
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorSub0_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m256i black = _mm256_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
const __m256i res = _mm256_sub_epi8(src, black);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_SSE[0](in + i, NULL, num_pixels - i, out + i);
|
||||
}
|
||||
(void)upper;
|
||||
}
|
||||
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorSub##X##_AVX2( \
|
||||
const uint32_t* const in, const uint32_t* const upper, int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT const out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) { \
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]); \
|
||||
const __m256i pred = _mm256_loadu_si256((const __m256i*)&(IN)); \
|
||||
const __m256i res = _mm256_sub_epi8(src, pred); \
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res); \
|
||||
} \
|
||||
if (i != num_pixels) { \
|
||||
VP8LPredictorsSub_SSE[(X)](in + i, WEBP_OFFSET_PTR(upper, i), \
|
||||
num_pixels - i, out + i); \
|
||||
} \
|
||||
}
|
||||
|
||||
GENERATE_PREDICTOR_1(1, in[i - 1]) // Predictor1: L
|
||||
GENERATE_PREDICTOR_1(2, upper[i]) // Predictor2: T
|
||||
GENERATE_PREDICTOR_1(3, upper[i + 1]) // Predictor3: TR
|
||||
GENERATE_PREDICTOR_1(4, upper[i - 1]) // Predictor4: TL
|
||||
#undef GENERATE_PREDICTOR_1
|
||||
|
||||
// Predictor5: avg2(avg2(L, TR), T)
|
||||
static void PredictorSub5_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
|
||||
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
|
||||
const __m256i TR = _mm256_loadu_si256((const __m256i*)&upper[i + 1]);
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
__m256i avg, pred, res;
|
||||
Average2_m256i(&L, &TR, &avg);
|
||||
Average2_m256i(&avg, &T, &pred);
|
||||
res = _mm256_sub_epi8(src, pred);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_SSE[5](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, A, B) \
|
||||
static void PredictorSub##X##_AVX2(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) { \
|
||||
const __m256i tA = _mm256_loadu_si256((const __m256i*)&(A)); \
|
||||
const __m256i tB = _mm256_loadu_si256((const __m256i*)&(B)); \
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]); \
|
||||
__m256i pred, res; \
|
||||
Average2_m256i(&tA, &tB, &pred); \
|
||||
res = _mm256_sub_epi8(src, pred); \
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res); \
|
||||
} \
|
||||
if (i != num_pixels) { \
|
||||
VP8LPredictorsSub_SSE[(X)](in + i, upper + i, num_pixels - i, out + i); \
|
||||
} \
|
||||
}
|
||||
|
||||
GENERATE_PREDICTOR_2(6, in[i - 1], upper[i - 1]) // Predictor6: avg(L, TL)
|
||||
GENERATE_PREDICTOR_2(7, in[i - 1], upper[i]) // Predictor7: avg(L, T)
|
||||
GENERATE_PREDICTOR_2(8, upper[i - 1], upper[i]) // Predictor8: avg(TL, T)
|
||||
GENERATE_PREDICTOR_2(9, upper[i], upper[i + 1]) // Predictor9: average(T, TR)
|
||||
#undef GENERATE_PREDICTOR_2
|
||||
|
||||
// Predictor10: avg(avg(L,TL), avg(T, TR)).
|
||||
static void PredictorSub10_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
|
||||
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
|
||||
const __m256i TR = _mm256_loadu_si256((const __m256i*)&upper[i + 1]);
|
||||
__m256i avgTTR, avgLTL, avg, res;
|
||||
Average2_m256i(&T, &TR, &avgTTR);
|
||||
Average2_m256i(&L, &TL, &avgLTL);
|
||||
Average2_m256i(&avgTTR, &avgLTL, &avg);
|
||||
res = _mm256_sub_epi8(src, avg);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_SSE[10](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Predictor11: select.
|
||||
static void GetSumAbsDiff32_AVX2(const __m256i* const A, const __m256i* const B,
|
||||
__m256i* const out) {
|
||||
// We can unpack with any value on the upper 32 bits, provided it's the same
|
||||
// on both operands (to that their sum of abs diff is zero). Here we use *A.
|
||||
const __m256i A_lo = _mm256_unpacklo_epi32(*A, *A);
|
||||
const __m256i B_lo = _mm256_unpacklo_epi32(*B, *A);
|
||||
const __m256i A_hi = _mm256_unpackhi_epi32(*A, *A);
|
||||
const __m256i B_hi = _mm256_unpackhi_epi32(*B, *A);
|
||||
const __m256i s_lo = _mm256_sad_epu8(A_lo, B_lo);
|
||||
const __m256i s_hi = _mm256_sad_epu8(A_hi, B_hi);
|
||||
*out = _mm256_packs_epi32(s_lo, s_hi);
|
||||
}
|
||||
|
||||
static void PredictorSub11_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
|
||||
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
|
||||
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
__m256i pa, pb;
|
||||
GetSumAbsDiff32_AVX2(&T, &TL, &pa); // pa = sum |T-TL|
|
||||
GetSumAbsDiff32_AVX2(&L, &TL, &pb); // pb = sum |L-TL|
|
||||
{
|
||||
const __m256i mask = _mm256_cmpgt_epi32(pb, pa);
|
||||
const __m256i A = _mm256_and_si256(mask, L);
|
||||
const __m256i B = _mm256_andnot_si256(mask, T);
|
||||
const __m256i pred = _mm256_or_si256(A, B); // pred = (L > T)? L : T
|
||||
const __m256i res = _mm256_sub_epi8(src, pred);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res);
|
||||
}
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_SSE[11](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Predictor12: ClampedSubSubtractFull.
|
||||
static void PredictorSub12_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
|
||||
const __m256i L_lo = _mm256_unpacklo_epi8(L, zero);
|
||||
const __m256i L_hi = _mm256_unpackhi_epi8(L, zero);
|
||||
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
|
||||
const __m256i T_lo = _mm256_unpacklo_epi8(T, zero);
|
||||
const __m256i T_hi = _mm256_unpackhi_epi8(T, zero);
|
||||
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
|
||||
const __m256i TL_lo = _mm256_unpacklo_epi8(TL, zero);
|
||||
const __m256i TL_hi = _mm256_unpackhi_epi8(TL, zero);
|
||||
const __m256i diff_lo = _mm256_sub_epi16(T_lo, TL_lo);
|
||||
const __m256i diff_hi = _mm256_sub_epi16(T_hi, TL_hi);
|
||||
const __m256i pred_lo = _mm256_add_epi16(L_lo, diff_lo);
|
||||
const __m256i pred_hi = _mm256_add_epi16(L_hi, diff_hi);
|
||||
const __m256i pred = _mm256_packus_epi16(pred_lo, pred_hi);
|
||||
const __m256i res = _mm256_sub_epi8(src, pred);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_SSE[12](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Predictors13: ClampedAddSubtractHalf
|
||||
static void PredictorSub13_AVX2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
for (i = 0; i + 8 <= num_pixels; i += 8) {
|
||||
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
|
||||
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
|
||||
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
|
||||
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
|
||||
// lo.
|
||||
const __m256i L_lo = _mm256_unpacklo_epi8(L, zero);
|
||||
const __m256i T_lo = _mm256_unpacklo_epi8(T, zero);
|
||||
const __m256i TL_lo = _mm256_unpacklo_epi8(TL, zero);
|
||||
const __m256i sum_lo = _mm256_add_epi16(T_lo, L_lo);
|
||||
const __m256i avg_lo = _mm256_srli_epi16(sum_lo, 1);
|
||||
const __m256i A1_lo = _mm256_sub_epi16(avg_lo, TL_lo);
|
||||
const __m256i bit_fix_lo = _mm256_cmpgt_epi16(TL_lo, avg_lo);
|
||||
const __m256i A2_lo = _mm256_sub_epi16(A1_lo, bit_fix_lo);
|
||||
const __m256i A3_lo = _mm256_srai_epi16(A2_lo, 1);
|
||||
const __m256i A4_lo = _mm256_add_epi16(avg_lo, A3_lo);
|
||||
// hi.
|
||||
const __m256i L_hi = _mm256_unpackhi_epi8(L, zero);
|
||||
const __m256i T_hi = _mm256_unpackhi_epi8(T, zero);
|
||||
const __m256i TL_hi = _mm256_unpackhi_epi8(TL, zero);
|
||||
const __m256i sum_hi = _mm256_add_epi16(T_hi, L_hi);
|
||||
const __m256i avg_hi = _mm256_srli_epi16(sum_hi, 1);
|
||||
const __m256i A1_hi = _mm256_sub_epi16(avg_hi, TL_hi);
|
||||
const __m256i bit_fix_hi = _mm256_cmpgt_epi16(TL_hi, avg_hi);
|
||||
const __m256i A2_hi = _mm256_sub_epi16(A1_hi, bit_fix_hi);
|
||||
const __m256i A3_hi = _mm256_srai_epi16(A2_hi, 1);
|
||||
const __m256i A4_hi = _mm256_add_epi16(avg_hi, A3_hi);
|
||||
|
||||
const __m256i pred = _mm256_packus_epi16(A4_lo, A4_hi);
|
||||
const __m256i res = _mm256_sub_epi8(src, pred);
|
||||
_mm256_storeu_si256((__m256i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_SSE[13](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void VP8LEncDspInitAVX2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitAVX2(void) {
|
||||
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_AVX2;
|
||||
VP8LTransformColor = TransformColor_AVX2;
|
||||
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_AVX2;
|
||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms_AVX2;
|
||||
VP8LAddVector = AddVector_AVX2;
|
||||
VP8LAddVectorEq = AddVectorEq_AVX2;
|
||||
VP8LCombinedShannonEntropy = CombinedShannonEntropy_AVX2;
|
||||
VP8LVectorMismatch = VectorMismatch_AVX2;
|
||||
VP8LBundleColorMap = BundleColorMap_AVX2;
|
||||
|
||||
VP8LPredictorsSub[0] = PredictorSub0_AVX2;
|
||||
VP8LPredictorsSub[1] = PredictorSub1_AVX2;
|
||||
VP8LPredictorsSub[2] = PredictorSub2_AVX2;
|
||||
VP8LPredictorsSub[3] = PredictorSub3_AVX2;
|
||||
VP8LPredictorsSub[4] = PredictorSub4_AVX2;
|
||||
VP8LPredictorsSub[5] = PredictorSub5_AVX2;
|
||||
VP8LPredictorsSub[6] = PredictorSub6_AVX2;
|
||||
VP8LPredictorsSub[7] = PredictorSub7_AVX2;
|
||||
VP8LPredictorsSub[8] = PredictorSub8_AVX2;
|
||||
VP8LPredictorsSub[9] = PredictorSub9_AVX2;
|
||||
VP8LPredictorsSub[10] = PredictorSub10_AVX2;
|
||||
VP8LPredictorsSub[11] = PredictorSub11_AVX2;
|
||||
VP8LPredictorsSub[12] = PredictorSub12_AVX2;
|
||||
VP8LPredictorsSub[13] = PredictorSub13_AVX2;
|
||||
VP8LPredictorsSub[14] = PredictorSub0_AVX2; // <- padding security sentinels
|
||||
VP8LPredictorsSub[15] = PredictorSub0_AVX2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_AVX2
|
||||
|
||||
WEBP_DSP_INIT_STUB(VP8LEncDspInitAVX2)
|
||||
|
||||
#endif // WEBP_USE_AVX2
|
@ -23,12 +23,12 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static float FastSLog2Slow_MIPS32(uint32_t v) {
|
||||
static uint64_t FastSLog2Slow_MIPS32(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
uint32_t log_cnt, y, correction;
|
||||
uint32_t log_cnt, y;
|
||||
uint64_t correction;
|
||||
const int c24 = 24;
|
||||
const float v_f = (float)v;
|
||||
uint32_t temp;
|
||||
|
||||
// Xf = 256 = 2^8
|
||||
@ -49,22 +49,23 @@ static float FastSLog2Slow_MIPS32(uint32_t v) {
|
||||
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
|
||||
// The correction factor: log(1 + d) ~ d; for very small d values, so
|
||||
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
|
||||
// LOG_2_RECIPROCAL ~ 23/16
|
||||
|
||||
// (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)
|
||||
correction = (23 * (v & (y - 1))) >> 4;
|
||||
return v_f * (kLog2Table[temp] + log_cnt) + correction;
|
||||
correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
|
||||
return (uint64_t)v * (kLog2Table[temp] +
|
||||
((uint64_t)log_cnt << LOG_2_PRECISION_BITS)) +
|
||||
correction;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
|
||||
return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);
|
||||
}
|
||||
}
|
||||
|
||||
static float FastLog2Slow_MIPS32(uint32_t v) {
|
||||
static uint32_t FastLog2Slow_MIPS32(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
uint32_t log_cnt, y;
|
||||
const int c24 = 24;
|
||||
double log_2;
|
||||
uint32_t log_2;
|
||||
uint32_t temp;
|
||||
|
||||
__asm__ volatile(
|
||||
@ -78,17 +79,16 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
|
||||
: [c24]"r"(c24), [v]"r"(v)
|
||||
);
|
||||
|
||||
log_2 = kLog2Table[temp] + log_cnt;
|
||||
log_2 = kLog2Table[temp] + (log_cnt << LOG_2_PRECISION_BITS);
|
||||
if (v >= APPROX_LOG_MAX) {
|
||||
// Since the division is still expensive, add this correction factor only
|
||||
// for large values of 'v'.
|
||||
|
||||
const uint32_t correction = (23 * (v & (y - 1))) >> 4;
|
||||
log_2 += (double)correction / v;
|
||||
const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
|
||||
log_2 += (uint32_t)DivRound(correction, v);
|
||||
}
|
||||
return (float)log_2;
|
||||
return log_2;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * log((double)v));
|
||||
return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -133,59 +133,6 @@ static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
return ((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
// C version of this function:
|
||||
// int i = 0;
|
||||
// int64_t cost = 0;
|
||||
// const uint32_t* pX = &X[4];
|
||||
// const uint32_t* pY = &Y[4];
|
||||
// const uint32_t* LoopEnd = &X[length];
|
||||
// while (pX != LoopEnd) {
|
||||
// const uint32_t xy0 = *pX + *pY;
|
||||
// const uint32_t xy1 = *(pX + 1) + *(pY + 1);
|
||||
// ++i;
|
||||
// cost += i * xy0;
|
||||
// cost += i * xy1;
|
||||
// pX += 2;
|
||||
// pY += 2;
|
||||
// }
|
||||
// return cost;
|
||||
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
int i, temp0, temp1, temp2, temp3;
|
||||
const uint32_t* pX = &X[4];
|
||||
const uint32_t* pY = &Y[4];
|
||||
const uint32_t* const LoopEnd = &X[length];
|
||||
|
||||
__asm__ volatile(
|
||||
"mult $zero, $zero \n\t"
|
||||
"xor %[i], %[i], %[i] \n\t"
|
||||
"beq %[pX], %[LoopEnd], 2f \n\t"
|
||||
"1: \n\t"
|
||||
"lw %[temp0], 0(%[pX]) \n\t"
|
||||
"lw %[temp1], 0(%[pY]) \n\t"
|
||||
"lw %[temp2], 4(%[pX]) \n\t"
|
||||
"lw %[temp3], 4(%[pY]) \n\t"
|
||||
"addiu %[i], %[i], 1 \n\t"
|
||||
"addu %[temp0], %[temp0], %[temp1] \n\t"
|
||||
"addu %[temp2], %[temp2], %[temp3] \n\t"
|
||||
"addiu %[pX], %[pX], 8 \n\t"
|
||||
"addiu %[pY], %[pY], 8 \n\t"
|
||||
"madd %[i], %[temp0] \n\t"
|
||||
"madd %[i], %[temp2] \n\t"
|
||||
"bne %[pX], %[LoopEnd], 1b \n\t"
|
||||
"2: \n\t"
|
||||
"mfhi %[temp0] \n\t"
|
||||
"mflo %[temp1] \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
||||
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
||||
[i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)
|
||||
: [LoopEnd]"r"(LoopEnd)
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
return ((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
#define HUFFMAN_COST_PASS \
|
||||
__asm__ volatile( \
|
||||
"sll %[temp1], %[temp0], 3 \n\t" \
|
||||
@ -215,8 +162,10 @@ static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
|
||||
// Returns the various RLE counts
|
||||
static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
|
||||
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
|
||||
int* WEBP_RESTRICT const i_prev,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int* const pstreaks = &stats->streaks[0][0];
|
||||
int* const pcnts = &stats->counts[0];
|
||||
int temp0, temp1, temp2, temp3;
|
||||
@ -227,7 +176,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
bit_entropy->sum += (*val_prev) * streak;
|
||||
bit_entropy->nonzeros += streak;
|
||||
bit_entropy->nonzero_code = *i_prev;
|
||||
bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
|
||||
bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;
|
||||
if (bit_entropy->max_val < *val_prev) {
|
||||
bit_entropy->max_val = *val_prev;
|
||||
}
|
||||
@ -241,9 +190,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
*i_prev = i;
|
||||
}
|
||||
|
||||
static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetEntropyUnrefined_MIPS32(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i;
|
||||
int i_prev = 0;
|
||||
uint32_t x_prev = X[0];
|
||||
@ -259,14 +209,13 @@ static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
|
||||
}
|
||||
GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
|
||||
|
||||
bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
const uint32_t Y[],
|
||||
int length,
|
||||
VP8LBitEntropy* const entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetCombinedEntropyUnrefined_MIPS32(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i = 1;
|
||||
int i_prev = 0;
|
||||
uint32_t xy_prev = X[0] + Y[0];
|
||||
@ -282,7 +231,7 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
}
|
||||
GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, entropy, stats);
|
||||
|
||||
entropy->entropy += VP8LFastSLog2(entropy->sum);
|
||||
entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;
|
||||
}
|
||||
|
||||
#define ASM_START \
|
||||
@ -344,8 +293,9 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
ASM_END_COMMON_0 \
|
||||
ASM_END_COMMON_1
|
||||
|
||||
static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
uint32_t* pout, int size) {
|
||||
static void AddVector_MIPS32(const uint32_t* WEBP_RESTRICT pa,
|
||||
const uint32_t* WEBP_RESTRICT pb,
|
||||
uint32_t* WEBP_RESTRICT pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
@ -356,7 +306,8 @@ static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
|
||||
static void AddVectorEq_MIPS32(const uint32_t* WEBP_RESTRICT pa,
|
||||
uint32_t* WEBP_RESTRICT pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
@ -383,7 +334,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
|
||||
VP8LFastSLog2Slow = FastSLog2Slow_MIPS32;
|
||||
VP8LFastLog2Slow = FastLog2Slow_MIPS32;
|
||||
VP8LExtraCost = ExtraCost_MIPS32;
|
||||
VP8LExtraCostCombined = ExtraCostCombined_MIPS32;
|
||||
VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;
|
||||
VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;
|
||||
VP8LAddVector = AddVector_MIPS32;
|
||||
|
@ -78,8 +78,9 @@ static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
|
||||
return (uint32_t)((int)(color_pred) * color) >> 5;
|
||||
}
|
||||
|
||||
static void TransformColor_MIPSdspR2(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels) {
|
||||
static void TransformColor_MIPSdspR2(
|
||||
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT data,
|
||||
int num_pixels) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
uint32_t argb, argb1, new_red, new_red1;
|
||||
const uint32_t G_to_R = m->green_to_red_;
|
||||
@ -171,13 +172,10 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
|
||||
return (new_blue & 0xff);
|
||||
}
|
||||
|
||||
static void CollectColorBlueTransforms_MIPSdspR2(const uint32_t* argb,
|
||||
int stride,
|
||||
int tile_width,
|
||||
int tile_height,
|
||||
int green_to_blue,
|
||||
int red_to_blue,
|
||||
int histo[]) {
|
||||
static void CollectColorBlueTransforms_MIPSdspR2(
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue, uint32_t histo[]) {
|
||||
const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
|
||||
const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
|
||||
const uint32_t mask = 0xff00ffu;
|
||||
@ -225,12 +223,9 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
|
||||
return (new_red & 0xff);
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb,
|
||||
int stride,
|
||||
int tile_width,
|
||||
int tile_height,
|
||||
int green_to_red,
|
||||
int histo[]) {
|
||||
static void CollectColorRedTransforms_MIPSdspR2(
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height, int green_to_red, uint32_t histo[]) {
|
||||
const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
|
||||
while (tile_height-- > 0) {
|
||||
int x;
|
||||
|
@ -48,8 +48,8 @@
|
||||
dst = VSHF_UB(src, t0, mask1); \
|
||||
} while (0)
|
||||
|
||||
static void TransformColor_MSA(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
static void TransformColor_MSA(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels) {
|
||||
v16u8 src0, dst0;
|
||||
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
|
||||
(m->green_to_red_ << 16));
|
||||
|
@ -72,8 +72,9 @@ static void SubtractGreenFromBlueAndRed_NEON(uint32_t* argb_data,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static void TransformColor_NEON(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
static void TransformColor_NEON(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels) {
|
||||
// sign-extended multiplying constants, pre-shifted by 6.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 6)
|
||||
const int16_t rb[8] = {
|
||||
|
@ -14,11 +14,16 @@
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include <assert.h>
|
||||
#include <emmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
// For sign-extended multiplying constants, pre-shifted by 5:
|
||||
#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
|
||||
@ -49,8 +54,9 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
static void TransformColor_SSE2(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels) {
|
||||
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
|
||||
CST_5b(m->green_to_blue_));
|
||||
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
|
||||
@ -79,10 +85,11 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#define SPAN 8
|
||||
static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
static void CollectColorBlueTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
int histo[]) {
|
||||
uint32_t histo[]) {
|
||||
const __m128i mults_r = MK_CST_16(CST_5b(red_to_blue), 0);
|
||||
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_blue));
|
||||
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
|
||||
@ -126,9 +133,10 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
static void CollectColorRedTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]) {
|
||||
int green_to_red, uint32_t histo[]) {
|
||||
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red));
|
||||
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
|
||||
const __m128i mask = _mm_set1_epi32(0xff);
|
||||
@ -172,75 +180,113 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
|
||||
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
||||
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
||||
#define LINE_SIZE 16 // 8 or 16
|
||||
static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~15;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 16);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||
#endif
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&b[i + 8]);
|
||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&b[i + 12]);
|
||||
#endif
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
#if (LINE_SIZE == 16)
|
||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
i += 16;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 8) != 0) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
i += 8;
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] = a[i] + b[i];
|
||||
|
||||
size &= 7;
|
||||
if (size == 4) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
|
||||
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
} else if (size == 2) {
|
||||
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&b[i]);
|
||||
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
}
|
||||
}
|
||||
|
||||
static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
static void AddVectorEq_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~15;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 16);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||
#endif
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&out[i + 8]);
|
||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&out[i + 12]);
|
||||
#endif
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
#if (LINE_SIZE == 16)
|
||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
i += 16;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 8) != 0) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
i += 8;
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] += a[i];
|
||||
|
||||
size &= 7;
|
||||
if (size == 4) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i]);
|
||||
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
} else if (size == 2) {
|
||||
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&out[i]);
|
||||
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
}
|
||||
}
|
||||
#undef LINE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entropy
|
||||
|
||||
// TODO(https://crbug.com/webp/499): this function produces different results
|
||||
// from the C code due to use of double/float resulting in output differences
|
||||
// when compared to -noasm.
|
||||
#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86))
|
||||
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
|
||||
|
||||
static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
|
||||
static uint64_t CombinedShannonEntropy_SSE2(const uint32_t X[256],
|
||||
const uint32_t Y[256]) {
|
||||
int i;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
uint64_t retval = 0;
|
||||
uint32_t sumX = 0, sumXY = 0;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
for (i = 0; i < 256; i += 16) {
|
||||
@ -260,19 +306,19 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
|
||||
int32_t my = _mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) | mx;
|
||||
while (my) {
|
||||
const int32_t j = BitsCtz(my);
|
||||
int xy;
|
||||
uint32_t xy;
|
||||
if ((mx >> j) & 1) {
|
||||
const int x = X[i + j];
|
||||
sumXY += x;
|
||||
retval -= VP8LFastSLog2(x);
|
||||
retval += VP8LFastSLog2(x);
|
||||
}
|
||||
xy = X[i + j] + Y[i + j];
|
||||
sumX += xy;
|
||||
retval -= VP8LFastSLog2(xy);
|
||||
retval += VP8LFastSLog2(xy);
|
||||
my &= my - 1;
|
||||
}
|
||||
}
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -335,8 +381,9 @@ static int VectorMismatch_SSE2(const uint32_t* const array1,
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst) {
|
||||
static void BundleColorMap_SSE2(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits,
|
||||
uint32_t* WEBP_RESTRICT dst) {
|
||||
int x;
|
||||
assert(xbits >= 0);
|
||||
assert(xbits <= 3);
|
||||
@ -425,7 +472,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -442,7 +489,8 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* const in, \
|
||||
const uint32_t* const upper, \
|
||||
int num_pixels, uint32_t* const out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT const out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
|
||||
@ -464,7 +512,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1]) // Predictor4: TL
|
||||
|
||||
// Predictor5: avg2(avg2(L, TR), T)
|
||||
static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -484,7 +532,8 @@ static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, A, B) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i tA = _mm_loadu_si128((const __m128i*)&(A)); \
|
||||
@ -508,7 +557,7 @@ GENERATE_PREDICTOR_2(9, upper[i], upper[i + 1]) // Predictor9: average(T, TR)
|
||||
|
||||
// Predictor10: avg(avg(L,TL), avg(T, TR)).
|
||||
static void PredictorSub10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -543,7 +592,7 @@ static void GetSumAbsDiff32_SSE2(const __m128i* const A, const __m128i* const B,
|
||||
}
|
||||
|
||||
static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -569,7 +618,7 @@ static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor12: ClampedSubSubtractFull.
|
||||
static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -598,28 +647,46 @@ static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictors13: ClampedAddSubtractHalf
|
||||
static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 2 <= num_pixels; i += 2) {
|
||||
// we can only process two pixels at a time
|
||||
const __m128i L = _mm_loadl_epi64((const __m128i*)&in[i - 1]);
|
||||
const __m128i src = _mm_loadl_epi64((const __m128i*)&in[i]);
|
||||
const __m128i T = _mm_loadl_epi64((const __m128i*)&upper[i]);
|
||||
const __m128i TL = _mm_loadl_epi64((const __m128i*)&upper[i - 1]);
|
||||
const __m128i L_lo = _mm_unpacklo_epi8(L, zero);
|
||||
const __m128i T_lo = _mm_unpacklo_epi8(T, zero);
|
||||
const __m128i TL_lo = _mm_unpacklo_epi8(TL, zero);
|
||||
const __m128i sum = _mm_add_epi16(T_lo, L_lo);
|
||||
const __m128i avg = _mm_srli_epi16(sum, 1);
|
||||
const __m128i A1 = _mm_sub_epi16(avg, TL_lo);
|
||||
const __m128i bit_fix = _mm_cmpgt_epi16(TL_lo, avg);
|
||||
const __m128i A2 = _mm_sub_epi16(A1, bit_fix);
|
||||
const __m128i A3 = _mm_srai_epi16(A2, 1);
|
||||
const __m128i A4 = _mm_add_epi16(avg, A3);
|
||||
const __m128i pred = _mm_packus_epi16(A4, A4);
|
||||
const __m128i res = _mm_sub_epi8(src, pred);
|
||||
_mm_storel_epi64((__m128i*)&out[i], res);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
|
||||
const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
|
||||
const __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
|
||||
__m128i A4_lo, A4_hi;
|
||||
// lo.
|
||||
{
|
||||
const __m128i L_lo = _mm_unpacklo_epi8(L, zero);
|
||||
const __m128i T_lo = _mm_unpacklo_epi8(T, zero);
|
||||
const __m128i TL_lo = _mm_unpacklo_epi8(TL, zero);
|
||||
const __m128i sum_lo = _mm_add_epi16(T_lo, L_lo);
|
||||
const __m128i avg_lo = _mm_srli_epi16(sum_lo, 1);
|
||||
const __m128i A1_lo = _mm_sub_epi16(avg_lo, TL_lo);
|
||||
const __m128i bit_fix_lo = _mm_cmpgt_epi16(TL_lo, avg_lo);
|
||||
const __m128i A2_lo = _mm_sub_epi16(A1_lo, bit_fix_lo);
|
||||
const __m128i A3_lo = _mm_srai_epi16(A2_lo, 1);
|
||||
A4_lo = _mm_add_epi16(avg_lo, A3_lo);
|
||||
}
|
||||
// hi.
|
||||
{
|
||||
const __m128i L_hi = _mm_unpackhi_epi8(L, zero);
|
||||
const __m128i T_hi = _mm_unpackhi_epi8(T, zero);
|
||||
const __m128i TL_hi = _mm_unpackhi_epi8(TL, zero);
|
||||
const __m128i sum_hi = _mm_add_epi16(T_hi, L_hi);
|
||||
const __m128i avg_hi = _mm_srli_epi16(sum_hi, 1);
|
||||
const __m128i A1_hi = _mm_sub_epi16(avg_hi, TL_hi);
|
||||
const __m128i bit_fix_hi = _mm_cmpgt_epi16(TL_hi, avg_hi);
|
||||
const __m128i A2_hi = _mm_sub_epi16(A1_hi, bit_fix_hi);
|
||||
const __m128i A3_hi = _mm_srai_epi16(A2_hi, 1);
|
||||
A4_hi = _mm_add_epi16(avg_hi, A3_hi);
|
||||
}
|
||||
{
|
||||
const __m128i pred = _mm_packus_epi16(A4_lo, A4_hi);
|
||||
const __m128i res = _mm_sub_epi8(src, pred);
|
||||
_mm_storeu_si128((__m128i*)&out[i], res);
|
||||
}
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_C[13](in + i, upper + i, num_pixels - i, out + i);
|
||||
@ -660,6 +727,15 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
|
||||
VP8LPredictorsSub[13] = PredictorSub13_SSE2;
|
||||
VP8LPredictorsSub[14] = PredictorSub0_SSE2; // <- padding security sentinels
|
||||
VP8LPredictorsSub[15] = PredictorSub0_SSE2;
|
||||
|
||||
// SSE exports for AVX and above.
|
||||
VP8LSubtractGreenFromBlueAndRed_SSE = SubtractGreenFromBlueAndRed_SSE2;
|
||||
VP8LTransformColor_SSE = TransformColor_SSE2;
|
||||
VP8LCollectColorBlueTransforms_SSE = CollectColorBlueTransforms_SSE2;
|
||||
VP8LCollectColorRedTransforms_SSE = CollectColorRedTransforms_SSE2;
|
||||
VP8LBundleColorMap_SSE = BundleColorMap_SSE2;
|
||||
|
||||
memcpy(VP8LPredictorsSub_SSE, VP8LPredictorsSub, sizeof(VP8LPredictorsSub));
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -14,9 +14,13 @@
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include <assert.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Cost operations.
|
||||
@ -44,28 +48,6 @@ static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
|
||||
return HorizontalSum_SSE41(cost);
|
||||
}
|
||||
|
||||
static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a,
|
||||
const uint32_t* const b, int length) {
|
||||
int i;
|
||||
__m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
|
||||
_mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
|
||||
assert(length % 8 == 0);
|
||||
|
||||
for (i = 8; i + 8 <= length; i += 8) {
|
||||
const int j = (i - 2) >> 1;
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
|
||||
const __m128i a2 = _mm_hadd_epi32(a0, a1);
|
||||
const __m128i b2 = _mm_hadd_epi32(b0, b1);
|
||||
const __m128i mul = _mm_mullo_epi32(_mm_add_epi32(a2, b2), w);
|
||||
cost = _mm_add_epi32(mul, cost);
|
||||
}
|
||||
return HorizontalSum_SSE41(cost);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Subtract-Green Transform
|
||||
|
||||
@ -95,10 +77,11 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
static void CollectColorBlueTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
int histo[]) {
|
||||
uint32_t histo[]) {
|
||||
const __m128i mult =
|
||||
MK_CST_16(CST_5b(red_to_blue) + 256,CST_5b(green_to_blue));
|
||||
const __m128i perm =
|
||||
@ -141,10 +124,11 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
static void CollectColorRedTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]) {
|
||||
|
||||
int green_to_red,
|
||||
uint32_t histo[]) {
|
||||
const __m128i mult = MK_CST_16(0, CST_5b(green_to_red));
|
||||
const __m128i mask_g = _mm_set1_epi32(0x0000ff00);
|
||||
if (tile_width >= 4) {
|
||||
@ -192,10 +176,14 @@ extern void VP8LEncDspInitSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
|
||||
VP8LExtraCost = ExtraCost_SSE41;
|
||||
VP8LExtraCostCombined = ExtraCostCombined_SSE41;
|
||||
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
|
||||
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
|
||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
|
||||
|
||||
// SSE exports for AVX and above.
|
||||
VP8LSubtractGreenFromBlueAndRed_SSE = SubtractGreenFromBlueAndRed_SSE41;
|
||||
VP8LCollectColorBlueTransforms_SSE = CollectColorBlueTransforms_SSE41;
|
||||
VP8LCollectColorRedTransforms_SSE = CollectColorRedTransforms_SSE41;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
@ -26,8 +26,8 @@
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
|
||||
// gcc-4.8.x at least.
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -41,8 +41,8 @@ static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -53,8 +53,8 @@ static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -71,8 +71,8 @@ static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
|
||||
static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 };
|
||||
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~1);
|
||||
const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
|
||||
for (; src < end; src += 2) {
|
||||
@ -89,8 +89,8 @@ static const uint8_t kBGRShuffle[3][8] = {
|
||||
{ 21, 22, 24, 25, 26, 28, 29, 30 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
|
||||
@ -116,8 +116,8 @@ static const uint8_t kRGBShuffle[3][8] = {
|
||||
{ 21, 20, 26, 25, 24, 30, 29, 28 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
|
||||
@ -209,7 +209,7 @@ static uint32_t Predictor13_NEON(const uint32_t* const left,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const uint8x16_t black = vreinterpretq_u8_u32(vdupq_n_u32(ARGB_BLACK));
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -222,7 +222,7 @@ static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const uint8x16_t zero = LOADQ_U32_AS_U8(0);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -248,7 +248,7 @@ static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorAdd##X##_NEON(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* out) { \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
|
||||
@ -276,7 +276,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -301,7 +301,7 @@ static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor6: average(left, TL)
|
||||
static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -317,7 +317,7 @@ static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor7: average(left, T)
|
||||
static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -335,7 +335,7 @@ static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_2(X, IN) \
|
||||
static void PredictorAdd##X##_NEON(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* out) { \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
|
||||
@ -363,7 +363,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -394,7 +394,7 @@ static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -427,7 +427,7 @@ static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint16x8_t L = vmovl_u8(LOAD_U32_AS_U8(out[-1]));
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -468,7 +468,7 @@ static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
|
@ -15,10 +15,14 @@
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include <emmintrin.h>
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictor Transform
|
||||
@ -186,7 +190,7 @@ static uint32_t Predictor13_SSE2(const uint32_t* const left,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -202,7 +206,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m128i prev = _mm_set1_epi32((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -230,7 +234,8 @@ static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
// per 8 bit channel.
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
|
||||
@ -259,7 +264,8 @@ GENERATE_PREDICTOR_ADD(Predictor7_SSE2, PredictorAdd7_SSE2)
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, IN) \
|
||||
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i Tother = _mm_loadu_si128((const __m128i*)&(IN)); \
|
||||
@ -297,7 +303,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -344,7 +350,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m128i pa;
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
@ -395,7 +401,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]);
|
||||
@ -490,8 +496,8 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
|
||||
static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
|
||||
@ -526,8 +532,8 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
@ -554,8 +560,9 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
|
||||
const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
@ -590,8 +597,9 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0);
|
||||
const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8);
|
||||
const __m128i mask_0x07 = _mm_set1_epi8(0x07);
|
||||
@ -631,8 +639,8 @@ static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
|
||||
const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
@ -703,6 +711,15 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE2(void) {
|
||||
VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_SSE2;
|
||||
VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_SSE2;
|
||||
VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE2;
|
||||
|
||||
// SSE exports for AVX and above.
|
||||
memcpy(VP8LPredictorsAdd_SSE, VP8LPredictorsAdd, sizeof(VP8LPredictorsAdd));
|
||||
|
||||
VP8LAddGreenToBlueAndRed_SSE = AddGreenToBlueAndRed_SSE2;
|
||||
VP8LTransformColorInverse_SSE = TransformColorInverse_SSE2;
|
||||
|
||||
VP8LConvertBGRAToRGB_SSE = ConvertBGRAToRGB_SSE2;
|
||||
VP8LConvertBGRAToRGBA_SSE = ConvertBGRAToRGBA_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -13,9 +13,10 @@
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include "src/dsp/common_sse41.h"
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
@ -77,8 +78,8 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_SSE41(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9,
|
||||
@ -95,8 +96,8 @@ static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_SSE41(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_SSE41(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,
|
||||
@ -124,6 +125,10 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) {
|
||||
VP8LTransformColorInverse = TransformColorInverse_SSE41;
|
||||
VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41;
|
||||
VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41;
|
||||
|
||||
// SSE exports for AVX and above.
|
||||
VP8LTransformColorInverse_SSE = TransformColorInverse_SSE41;
|
||||
VP8LConvertBGRAToRGB_SSE = ConvertBGRAToRGB_SSE41;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user