mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-14 21:09:55 +02:00
Compare commits
60 Commits
615e58744f
...
v1.5.0
Author | SHA1 | Date | |
---|---|---|---|
a4d7a71533 | |||
c3d85ce4cf | |||
ad14e811cf | |||
74cd026edb | |||
a027aa93de | |||
25e17c686f | |||
aa2684fccc | |||
369238461b | |||
ceea8ff6b3 | |||
e4f7a9f0c7 | |||
1b4c967fbb | |||
9e5ecfaf00 | |||
da0d9c7d4e | |||
fcff86c71b | |||
b76c4a8416 | |||
306335198d | |||
4c85d860ea | |||
0ab789e067 | |||
0323645066 | |||
61e2cfdadd | |||
7bda3deb89 | |||
2ddaaf0aa5 | |||
a3ba6f19e9 | |||
f999d94f4a | |||
dfdcb7f95c | |||
78ed683978 | |||
d516a68e54 | |||
874069042e | |||
fdb229ea3a | |||
0c3cd9cc2c | |||
169dfbf931 | |||
2dd5eb9862 | |||
23bbafbeb8 | |||
35915b389e | |||
a32b436bd5 | |||
04d4b4f387 | |||
b1cb37e659 | |||
201894ef24 | |||
02eac8a741 | |||
84b118c9c3 | |||
052cf42f1a | |||
220ee52967 | |||
7861947813 | |||
14f09ab75b | |||
a78c5356ba | |||
bc49176355 | |||
34f9223829 | |||
367ca938f1 | |||
a582b53b74 | |||
0fd25d8406 | |||
f888291359 | |||
40e4ca60ea | |||
57883c78ed | |||
1c8eba978b | |||
2e81017c7a | |||
94de6c7fed | |||
51d9832a36 | |||
7bcb36b884 | |||
8e0cc14c3e | |||
cea684626d |
3
AUTHORS
3
AUTHORS
@ -11,11 +11,13 @@ Contributors:
|
||||
- Christopher Degawa (ccom at randomderp dot com)
|
||||
- Clement Courbet (courbet at google dot com)
|
||||
- Djordje Pesut (djordje dot pesut at imgtec dot com)
|
||||
- Frank (1433351828 at qq dot com)
|
||||
- Frank Barchard (fbarchard at google dot com)
|
||||
- Hui Su (huisu at google dot com)
|
||||
- H. Vetinari (h dot vetinari at gmx dot com)
|
||||
- Ilya Kurdyukov (jpegqs at gmail dot com)
|
||||
- Ingvar Stepanyan (rreverser at google dot com)
|
||||
- Istvan Stefan (Istvan dot Stefan at arm dot com)
|
||||
- James Zern (jzern at google dot com)
|
||||
- Jan Engelhardt (jengelh at medozas dot de)
|
||||
- Jehan (jehan at girinstud dot io)
|
||||
@ -62,6 +64,7 @@ Contributors:
|
||||
- Vincent Rabaud (vrabaud at google dot com)
|
||||
- Vlad Tsyrklevich (vtsyrklevich at chromium dot org)
|
||||
- Wan-Teh Chang (wtc at google dot com)
|
||||
- wrv (wrv at utexas dot edu)
|
||||
- Yang Zhang (yang dot zhang at arm dot com)
|
||||
- Yannis Guyon (yguyon at google dot com)
|
||||
- Zhi An Ng (zhin at chromium dot org)
|
||||
|
@ -567,7 +567,8 @@ if(WEBP_BUILD_GIF2WEBP)
|
||||
add_executable(gif2webp ${GIF2WEBP_SRCS})
|
||||
target_link_libraries(gif2webp exampleutil imageioutil webp libwebpmux
|
||||
${WEBP_DEP_GIF_LIBRARIES})
|
||||
target_include_directories(gif2webp PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src)
|
||||
target_include_directories(gif2webp PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
install(TARGETS gif2webp RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endif()
|
||||
|
||||
|
138
ChangeLog
138
ChangeLog
@ -1,3 +1,141 @@
|
||||
c3d85ce4 update NEWS
|
||||
ad14e811 tests/fuzzer/*: add missing <string_view> include
|
||||
74cd026e fuzz_utils.cc: fix build error w/WEBP_REDUCE_SIZE
|
||||
a027aa93 mux_demux_api_fuzzer.cc: fix -Wshadow warning
|
||||
25e17c68 update ChangeLog (tag: v1.5.0-rc1)
|
||||
aa2684fc update NEWS
|
||||
36923846 bump version to 1.5.0
|
||||
ceea8ff6 update AUTHORS
|
||||
e4f7a9f0 img2webp: add a warning for unused options
|
||||
1b4c967f Merge "Properly check the data size against the end of the RIFF chunk" into main
|
||||
9e5ecfaf Properly check the data size against the end of the RIFF chunk
|
||||
da0d9c7d examples: exit w/failure w/no args
|
||||
fcff86c7 {gif,img}2webp: sync -m help w/cwebp
|
||||
b76c4a84 man/img2webp.1: sync -m text w/cwebp.1 & gif2webp.1
|
||||
30633519 muxread: fix reading of buffers > riff size
|
||||
4c85d860 yuv.h: update RGB<->YUV coefficients in comment
|
||||
0ab789e0 Merge changes I6dfedfd5,I2376e2dc into main
|
||||
03236450 {ios,xcframework}build.sh: fix compilation w/Xcode 16
|
||||
61e2cfda rework AddVectorEq_SSE2
|
||||
7bda3deb rework AddVector_SSE2
|
||||
2ddaaf0a Fix variable names in SharpYuvComputeConversionMatrix
|
||||
a3ba6f19 Makefile.vc: fix gif2webp link error
|
||||
f999d94f gif2webp: add -sharp_yuv/-near_lossless
|
||||
dfdcb7f9 Merge "lossless.h: fix function declaration mismatches" into main (tag: webp-rfc9649)
|
||||
78ed6839 fix overread in Intra4Preds_NEON
|
||||
d516a68e lossless.h: fix function declaration mismatches
|
||||
87406904 Merge "Improve documentation of SharpYuvConversionMatrix." into main
|
||||
fdb229ea Merge changes I07a7e36a,Ib29980f7,I2316122d,I2356e314,I32b53dd3, ... into main
|
||||
0c3cd9cc Improve documentation of SharpYuvConversionMatrix.
|
||||
169dfbf9 disable Intra4Preds_NEON
|
||||
2dd5eb98 dsp/yuv*: use WEBP_RESTRICT qualifier
|
||||
23bbafbe dsp/upsampling*: use WEBP_RESTRICT qualifier
|
||||
35915b38 dsp/rescaler*: use WEBP_RESTRICT qualifier
|
||||
a32b436b dsp/lossless*: use WEBP_RESTRICT qualifier
|
||||
04d4b4f3 dsp/filters*: use WEBP_RESTRICT qualifier
|
||||
b1cb37e6 dsp/enc*: use WEBP_RESTRICT qualifier
|
||||
201894ef dsp/dec*: use WEBP_RESTRICT qualifier
|
||||
02eac8a7 dsp/cost*: use WEBP_RESTRICT qualifier
|
||||
84b118c9 Merge "webp-container-spec: normalize notes & unknown chunk link" into main
|
||||
052cf42f webp-container-spec: normalize notes & unknown chunk link
|
||||
220ee529 Search for best predictor transform bits
|
||||
78619478 Try to reduce the sampling for the entropy image
|
||||
14f09ab7 webp-container-spec: reorder chunk size - N text
|
||||
a78c5356 Remove a useless malloc for entropy image
|
||||
bc491763 Merge "Refactor predictor finding" into main
|
||||
34f92238 man/{cwebp,img2webp}.1: rm 'if needed' from -sharp_yuv
|
||||
367ca938 Refactor predictor finding
|
||||
a582b53b webp-lossless-bitstream-spec: clarify some text
|
||||
0fd25d84 Merge "anim_encode.c: fix function ref in comment" into main
|
||||
f8882913 anim_encode.c: fix function ref in comment
|
||||
40e4ca60 specs_generation.md: update kramdown command line
|
||||
57883c78 img2webp: add -exact/-noexact per-frame options
|
||||
1c8eba97 img2webp,cosmetics: add missing '.' spacers to help
|
||||
2e81017c Convert predictor_enc.c to fixed point
|
||||
94de6c7f Merge "Fix fuzztest link errors w/-DBUILD_SHARED_LIBS=1" into main
|
||||
51d9832a Fix fuzztest link errors w/-DBUILD_SHARED_LIBS=1
|
||||
7bcb36b8 Merge "Fix static overflow warning." into main
|
||||
8e0cc14c Fix static overflow warning.
|
||||
cea68462 README.md: add security report note
|
||||
615e5874 Merge "make VP8LPredictor[01]_C() static" into main
|
||||
233e86b9 Merge changes Ie43dc5ef,I94cd8bab into main
|
||||
1a29fd2f make VP8LPredictor[01]_C() static
|
||||
dd9d3770 Do*Filter_*: remove row & num_rows parameters
|
||||
ab451a49 Do*Filter_C: remove dead 'inverse' code paths
|
||||
f9a480f7 {TrueMotion,TM16}_NEON: remove zero extension
|
||||
04834aca Merge changes I25c30a9e,I0a192fc6,I4cf89575 into main
|
||||
39a602af webp-lossless-bitstream-spec: normalize predictor transform ref
|
||||
f28c837d Merge "webp-container-spec: align anim pseudocode w/prose" into main
|
||||
74be8e22 Fix implicit conversion issues
|
||||
0c01db7c Merge "Increase the transform bits if possible." into main
|
||||
f2d6dc1e Increase the transform bits if possible.
|
||||
caa19e5b update link to issue tracker
|
||||
c9dd9bd4 webp-container-spec: align anim pseudocode w/prose
|
||||
8a7c8dc6 WASM: Enable VP8L_USE_FAST_LOAD
|
||||
f0c53cd9 WASM: don't use USE_GENERIC_TREE
|
||||
eef903d0 WASM: Enable 64-bit BITS caching
|
||||
6296cc8d iterator_enc: make VP8IteratorReset() static
|
||||
fbd93896 histogram_enc: make VP8LGetHistogramSize static
|
||||
cc7ff545 cost_enc: make VP8CalculateLevelCosts[] static
|
||||
4e2828ba vp8l_dec: make VP8LClear() static
|
||||
d742b24a Intra16Preds_NEON: fix truemotion saturation
|
||||
c7bb4cb5 Intra4Preds_NEON: fix truemotion saturation
|
||||
952a989b Merge "Remove TODO now that log is using fixed point." into main
|
||||
dde11574 Remove TODO now that log is using fixed point.
|
||||
a1ca153d Fix hidden myerr in my_error_exit
|
||||
3bd94202 Merge changes Iff6e47ed,I24c67cd5,Id781e761 into main
|
||||
d27d246e Merge "Convert VP8LFastSLog2 to fixed point" into main
|
||||
4838611f Disable msg_code use in fuzzing mode
|
||||
314a142a Use QuantizeBlock_NEON for VP8EncQuantizeBlockWHT on Arm
|
||||
3bfb05e3 Add AArch64 Neon implementation of Intra16Preds
|
||||
baa93808 Add AArch64 Neon implementation of Intra4Preds
|
||||
41a5e582 Fix errors when compiling code as C++
|
||||
fb444b69 Convert VP8LFastSLog2 to fixed point
|
||||
c1c89f51 Fix WEBP_NODISCARD comment and C++ version
|
||||
66408c2c Switch the histogram_enc.h API to fixed point
|
||||
ac1e410d Remove leftover tiff dep
|
||||
b78d3957 Disable TIFF on fuzztest.
|
||||
cff21a7d Do not build statically on oss-fuzz.
|
||||
6853a8e5 Merge "Move more internal fuzzers to public." into main
|
||||
9bc09db4 Merge "Convert VP8LFastLog2 to fixed point" into main
|
||||
0a9f1c19 Convert VP8LFastLog2 to fixed point
|
||||
db0cb9c2 Move more internal fuzzers to public.
|
||||
ff2b5b15 Merge "advanced_api_fuzzer.cc: use crop dims in OOM check" into main
|
||||
c4af79d0 Put 0 at the end of a palette and do not store it.
|
||||
0ec80aef Delete last references to delta palettization
|
||||
96d79f84 advanced_api_fuzzer.cc: use crop dims in OOM check
|
||||
c35c7e02 Fix huffman fuzzer to not leak.
|
||||
f2fe8dec Bump fuzztest dependency.
|
||||
9ce982fd Fix fuzz tests to work on oss-fuzz
|
||||
3ba8af1a Do not escape quotes anymore in build.sh
|
||||
ea0e121b Allow centipede to be used as a fuzzing engine.
|
||||
27731afd make VP8I4ModeOffsets & VP8MakeIntra4Preds static
|
||||
ddd6245e oss-fuzz/build.sh: use heredoc for script creation
|
||||
50074930 oss-fuzz/build.sh,cosmetics: fix indent
|
||||
20e92f7d Limit the possible fuzz engines.
|
||||
4f200de5 Switch public fuzz tests to fuzztest.
|
||||
64186bb3 Add huffman_fuzzer to .gitignore
|
||||
0905f61c Move build script from oss-fuzz repo to here.
|
||||
e8678758 Fix link to Javascript documentation
|
||||
5e5b8f0c Fix SSE2 Transform_AC3 function name
|
||||
45129ee0 Revert "Check all the rows."
|
||||
ee26766a Check all the rows.
|
||||
7ec51c59 Increase the transform bits if possible.
|
||||
3cd16fd3 Revert "Increase the transform bits if possible."
|
||||
971a03d8 Increase the transform bits if possible.
|
||||
1bf198a2 Allow transform_bits to be different during encoding.
|
||||
1e462ca8 Define MAX_TRANSFORM_BITS according to the specification.
|
||||
64d1ec23 Use (MIN/NUM)_(TRANSFORM/HUFFMAN)_BITS where appropriate
|
||||
a90160e1 Refactor histograms in predictors.
|
||||
a7aa7525 Fix some function declarations
|
||||
68ff4e1e Merge "jpegdec: add a hint for EOF/READ errors" into main
|
||||
79e7968a jpegdec: add a hint for EOF/READ errors
|
||||
d33455cd man/*: s/BUGS/REPORTING BUGS/
|
||||
a67ff735 normalize example exit status
|
||||
edc28909 upsampling_{neon,sse41}: fix int sanitizer warning
|
||||
3cada4ce ImgIoUtilReadFile: check ftell() return
|
||||
dc950585 Merge tag 'v1.4.0'
|
||||
845d5476 update ChangeLog (tag: v1.4.0, origin/1.4.0)
|
||||
8a6a55bb update NEWS
|
||||
cf7c5a5d provide a way to opt-out/override WEBP_NODISCARD
|
||||
cc34288a update ChangeLog (tag: v1.4.0-rc1)
|
||||
|
@ -393,7 +393,7 @@ $(DIRBIN)\dwebp.exe: $(IMAGEIO_UTIL_OBJS)
|
||||
$(DIRBIN)\dwebp.exe: $(LIBWEBPDEMUX)
|
||||
$(DIRBIN)\gif2webp.exe: $(DIROBJ)\examples\gif2webp.obj $(EX_GIF_DEC_OBJS)
|
||||
$(DIRBIN)\gif2webp.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS) $(LIBWEBPMUX)
|
||||
$(DIRBIN)\gif2webp.exe: $(LIBWEBP)
|
||||
$(DIRBIN)\gif2webp.exe: $(LIBWEBP) $(LIBSHARPYUV)
|
||||
$(DIRBIN)\vwebp.exe: $(DIROBJ)\examples\vwebp.obj $(EX_UTIL_OBJS)
|
||||
$(DIRBIN)\vwebp.exe: $(IMAGEIO_UTIL_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
|
||||
$(DIRBIN)\vwebp_sdl.exe: $(DIROBJ)\extras\vwebp_sdl.obj
|
||||
|
22
NEWS
22
NEWS
@ -1,3 +1,25 @@
|
||||
- 12/19/2024 version 1.5.0
|
||||
This is a binary compatible release.
|
||||
API changes:
|
||||
- `cross_color_transform_bits` added to WebPAuxStats
|
||||
* minor lossless encoder speed and compression improvements
|
||||
* lossless encoding does not use floats anymore
|
||||
* additional Arm optimizations for lossy & lossless + general code generation
|
||||
improvements
|
||||
* improvements to WASM performance (#643)
|
||||
* improvements and corrections in webp-container-spec.txt and
|
||||
webp-lossless-bitstream-spec.txt (#646, #355607636)
|
||||
* further security related hardening and increased fuzzing coverage w/fuzztest
|
||||
(oss-fuzz: #382816119, #70112, #70102, #69873, #69825, #69508, #69208)
|
||||
* miscellaneous warning, bug & build fixes (#499, #562, #381372617,
|
||||
#381109771, #42340561, #375011696, #372109644, chromium: #334120888)
|
||||
Tool updates:
|
||||
* gif2webp: add -sharp_yuv & -near_lossless
|
||||
* img2webp: add -exact & -noexact
|
||||
* exit codes normalized; running an example program with no
|
||||
arguments will output its help and exit with an error (#42340557,
|
||||
#381372617)
|
||||
|
||||
- 4/12/2024: version 1.4.0
|
||||
This is a binary compatible release.
|
||||
* API changes:
|
||||
|
@ -7,7 +7,7 @@
|
||||
\__\__/\____/\_____/__/ ____ ___
|
||||
/ _/ / \ \ / _ \/ _/
|
||||
/ \_/ / / \ \ __/ \__
|
||||
\____/____/\_____/_____/____/v1.4.0
|
||||
\____/____/\_____/_____/____/v1.5.0
|
||||
```
|
||||
|
||||
WebP codec is a library to encode and decode images in WebP format. This package
|
||||
@ -42,7 +42,8 @@ See the [APIs documentation](doc/api.md), and API usage examples in the
|
||||
|
||||
## Bugs
|
||||
|
||||
Please report all bugs to the issue tracker: https://issues.webmproject.org
|
||||
Please report all bugs to the [issue tracker](https://issues.webmproject.org).
|
||||
For security reports, select 'Security report' from the Template dropdown.
|
||||
|
||||
Patches welcome! See [how to contribute](CONTRIBUTING.md).
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
AC_INIT([libwebp], [1.4.0],
|
||||
AC_INIT([libwebp], [1.5.0],
|
||||
[https://issues.webmproject.org],,
|
||||
[https://developers.google.com/speed/webp])
|
||||
AC_CANONICAL_HOST
|
||||
|
@ -17,10 +17,11 @@ rubygems will install automatically. The following will apply inline CSS
|
||||
styling; an external stylesheet is not needed.
|
||||
|
||||
```shell
|
||||
$ kramdown doc/webp-lossless-bitstream-spec.txt --template \
|
||||
doc/template.html --coderay-css style --coderay-line-numbers ' ' \
|
||||
--coderay-default-lang c > \
|
||||
doc/output/webp-lossless-bitstream-spec.html
|
||||
$ kramdown doc/webp-lossless-bitstream-spec.txt \
|
||||
--template doc/template.html \
|
||||
-x syntax-coderay --syntax-highlighter coderay \
|
||||
--syntax-highlighter-opts "{default_lang: c, line_numbers: , css: style}" \
|
||||
> doc/output/webp-lossless-bitstream-spec.html
|
||||
```
|
||||
|
||||
Optimally, use kramdown 0.13.7 or newer if syntax highlighting desired.
|
||||
|
15
doc/tools.md
15
doc/tools.md
@ -321,10 +321,13 @@ Per-frame options (only used for subsequent images input):
|
||||
|
||||
```
|
||||
-d <int> ............. frame duration in ms (default: 100)
|
||||
-lossless ........... use lossless mode (default)
|
||||
-lossy ... ........... use lossy mode
|
||||
-lossless ............ use lossless mode (default)
|
||||
-lossy ............... use lossy mode
|
||||
-q <float> ........... quality
|
||||
-m <int> ............. method to use
|
||||
-m <int> ............. compression method (0=fast, 6=slowest), default=4
|
||||
-exact, -noexact ..... preserve or alter RGB values in transparent area
|
||||
(default: -noexact, may cause artifacts
|
||||
with lossy animations)
|
||||
```
|
||||
|
||||
example: `img2webp -loop 2 in0.png -lossy in1.jpg -d 80 in2.tiff -o out.webp`
|
||||
@ -351,8 +354,12 @@ Options:
|
||||
-lossy ................. encode image using lossy compression
|
||||
-mixed ................. for each frame in the image, pick lossy
|
||||
or lossless compression heuristically
|
||||
-near_lossless <int> ... use near-lossless image preprocessing
|
||||
(0..100=off), default=100
|
||||
-sharp_yuv ............. use sharper (and slower) RGB->YUV conversion
|
||||
(lossy only)
|
||||
-q <float> ............. quality factor (0:small..100:big)
|
||||
-m <int> ............... compression method (0=fast, 6=slowest)
|
||||
-m <int> ............... compression method (0=fast, 6=slowest), default=4
|
||||
-min_size .............. minimize output size (default:off)
|
||||
lossless compression by default; can be
|
||||
combined with -q, -m, -lossy or -mixed
|
||||
|
@ -131,7 +131,7 @@ Chunk Payload: _Chunk Size_ bytes
|
||||
: The data payload. If _Chunk Size_ is odd, a single padding byte -- which MUST
|
||||
be `0` to conform with RIFF -- is added.
|
||||
|
||||
**Note:** RIFF has a convention that all-uppercase chunk FourCCs are standard
|
||||
**Note**: RIFF has a convention that all-uppercase chunk FourCCs are standard
|
||||
chunks that apply to any RIFF file format, while FourCCs specific to a file
|
||||
format are all lowercase. WebP does not follow this convention.
|
||||
|
||||
@ -220,7 +220,7 @@ use another conversion method, but visual results may differ among decoders.
|
||||
Simple File Format (Lossless)
|
||||
-----------------------------
|
||||
|
||||
**Note:** Older readers may not support files using the lossless format.
|
||||
**Note**: Older readers may not support files using the lossless format.
|
||||
|
||||
This layout SHOULD be used if the image requires _lossless_ encoding (with an
|
||||
optional transparency channel) and does not require advanced features provided
|
||||
@ -262,7 +262,7 @@ and height of the canvas.
|
||||
Extended File Format
|
||||
--------------------
|
||||
|
||||
**Note:** Older readers may not support files using the extended format.
|
||||
**Note**: Older readers may not support files using the extended format.
|
||||
|
||||
An extended format file consists of:
|
||||
|
||||
@ -290,12 +290,12 @@ up of:
|
||||
For an _animated image_, the _image data_ consists of multiple frames. More
|
||||
details about frames can be found in the [Animation](#animation) section.
|
||||
|
||||
All chunks necessary for reconstruction and color correction, that is 'VP8X',
|
||||
'ICCP', 'ANIM', 'ANMF', 'ALPH', 'VP8 ' and 'VP8L', MUST appear in the order
|
||||
All chunks necessary for reconstruction and color correction, that is, 'VP8X',
|
||||
'ICCP', 'ANIM', 'ANMF', 'ALPH', 'VP8 ', and 'VP8L', MUST appear in the order
|
||||
described earlier. Readers SHOULD fail when chunks necessary for reconstruction
|
||||
and color correction are out of order.
|
||||
|
||||
[Metadata](#metadata) and [unknown](#unknown-chunks) chunks MAY appear out of
|
||||
[Metadata](#metadata) and [unknown chunks](#unknown-chunks) MAY appear out of
|
||||
order.
|
||||
|
||||
**Rationale:** The chunks necessary for reconstruction should appear first in
|
||||
@ -401,7 +401,7 @@ Background Color: 32 bits (_uint32_)
|
||||
around the frames, as well as the transparent pixels of the first frame.
|
||||
The background color is also used when the Disposal method is `1`.
|
||||
|
||||
**Note**:
|
||||
**Notes**:
|
||||
|
||||
* The background color MAY contain a non-opaque alpha value, even if the
|
||||
_Alpha_ flag in the ['VP8X' Chunk](#extended_header) is unset.
|
||||
@ -525,7 +525,7 @@ Disposal method (D): 1 bit
|
||||
not present, standard RGB (sRGB) is to be assumed. (Note that sRGB also
|
||||
needs to be linearized due to a gamma of ~2.2.)
|
||||
|
||||
Frame Data: _Chunk Size_ - `16` bytes
|
||||
Frame Data: _Chunk Size_ bytes - `16`
|
||||
|
||||
: Consists of:
|
||||
|
||||
@ -616,7 +616,7 @@ Compression method (C): 2 bits
|
||||
* `0`: No compression.
|
||||
* `1`: Compressed using the WebP lossless format.
|
||||
|
||||
Alpha bitstream: _Chunk Size_ - `1` bytes
|
||||
Alpha bitstream: _Chunk Size_ bytes - `1`
|
||||
|
||||
: Encoded alpha bitstream.
|
||||
|
||||
|
@ -436,8 +436,8 @@ should be interpreted as an 8-bit two's complement number (that is: uint8 range
|
||||
|
||||
The multiplication is to be done using more precision (with at least 16-bit
|
||||
precision). The sign extension property of the shift operation does not matter
|
||||
here; only the lowest 8 bits are used from the result, and there the sign
|
||||
extension shifting and unsigned shifting are consistent with each other.
|
||||
here; only the lowest 8 bits are used from the result, and in these bits, the
|
||||
sign extension shifting and unsigned shifting are consistent with each other.
|
||||
|
||||
Now, we describe the contents of color transform data so that decoding can apply
|
||||
the inverse color transform and recover the original red and blue values. The
|
||||
@ -613,8 +613,8 @@ We use image data in five different roles:
|
||||
1. Color transform image: Created by `ColorTransformElement` values
|
||||
(defined in ["Color Transform"](#color-transform)) for different blocks of
|
||||
the image.
|
||||
1. Color indexing image: An array of size `color_table_size` (up to 256 ARGB
|
||||
values) storing the metadata for the color indexing transform (see
|
||||
1. Color indexing image: An array of the size of `color_table_size` (up to
|
||||
256 ARGB values) that stores metadata for the color indexing transform (see
|
||||
["Color Indexing Transform"](#color-indexing-transform)).
|
||||
|
||||
### 5.2 Encoding of Image Data
|
||||
|
@ -67,7 +67,7 @@ dwebp_LDADD += ../src/libwebp.la
|
||||
dwebp_LDADD +=$(PNG_LIBS) $(JPEG_LIBS)
|
||||
|
||||
gif2webp_SOURCES = gif2webp.c gifdec.c gifdec.h
|
||||
gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(GIF_INCLUDES)
|
||||
gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(GIF_INCLUDES) -I$(top_srcdir)
|
||||
gif2webp_LDADD =
|
||||
gif2webp_LDADD += libexample_util.la
|
||||
gif2webp_LDADD += ../imageio/libimageio_util.la
|
||||
|
@ -698,7 +698,7 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
if (argc == 1) {
|
||||
HelpShort();
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (c = 1; c < argc; ++c) {
|
||||
|
@ -28,6 +28,7 @@
|
||||
#endif
|
||||
|
||||
#include <gif_lib.h>
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
#include "webp/encode.h"
|
||||
#include "webp/mux.h"
|
||||
#include "../examples/example_util.h"
|
||||
@ -70,8 +71,14 @@ static void Help(void) {
|
||||
printf(" -lossy ................. encode image using lossy compression\n");
|
||||
printf(" -mixed ................. for each frame in the image, pick lossy\n"
|
||||
" or lossless compression heuristically\n");
|
||||
printf(" -near_lossless <int> ... use near-lossless image preprocessing\n"
|
||||
" (0..100=off), default=100\n");
|
||||
printf(" -sharp_yuv ............. use sharper (and slower) RGB->YUV "
|
||||
"conversion\n"
|
||||
" (lossy only)\n");
|
||||
printf(" -q <float> ............. quality factor (0:small..100:big)\n");
|
||||
printf(" -m <int> ............... compression method (0=fast, 6=slowest)\n");
|
||||
printf(" -m <int> ............... compression method (0=fast, 6=slowest), "
|
||||
"default=4\n");
|
||||
printf(" -min_size .............. minimize output size (default:off)\n"
|
||||
" lossless compression by default; can be\n"
|
||||
" combined with -q, -m, -lossy or -mixed\n"
|
||||
@ -151,7 +158,7 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
if (argc == 1) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (c = 1; c < argc; ++c) {
|
||||
@ -166,6 +173,10 @@ int main(int argc, const char* argv[]) {
|
||||
} else if (!strcmp(argv[c], "-mixed")) {
|
||||
enc_options.allow_mixed = 1;
|
||||
config.lossless = 0;
|
||||
} else if (!strcmp(argv[c], "-near_lossless") && c < argc - 1) {
|
||||
config.near_lossless = ExUtilGetInt(argv[++c], 0, &parse_error);
|
||||
} else if (!strcmp(argv[c], "-sharp_yuv")) {
|
||||
config.use_sharp_yuv = 1;
|
||||
} else if (!strcmp(argv[c], "-loop_compatibility")) {
|
||||
loop_compatibility = 1;
|
||||
} else if (!strcmp(argv[c], "-q") && c < argc - 1) {
|
||||
@ -226,10 +237,13 @@ int main(int argc, const char* argv[]) {
|
||||
} else if (!strcmp(argv[c], "-version")) {
|
||||
const int enc_version = WebPGetEncoderVersion();
|
||||
const int mux_version = WebPGetMuxVersion();
|
||||
const int sharpyuv_version = SharpYuvGetVersion();
|
||||
printf("WebP Encoder version: %d.%d.%d\nWebP Mux version: %d.%d.%d\n",
|
||||
(enc_version >> 16) & 0xff, (enc_version >> 8) & 0xff,
|
||||
enc_version & 0xff, (mux_version >> 16) & 0xff,
|
||||
(mux_version >> 8) & 0xff, mux_version & 0xff);
|
||||
printf("libsharpyuv: %d.%d.%d\n", (sharpyuv_version >> 24) & 0xff,
|
||||
(sharpyuv_version >> 16) & 0xffff, sharpyuv_version & 0xff);
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
} else if (!strcmp(argv[c], "-quiet")) {
|
||||
quiet = 1;
|
||||
|
@ -59,10 +59,15 @@ static void Help(void) {
|
||||
|
||||
printf("Per-frame options (only used for subsequent images input):\n");
|
||||
printf(" -d <int> ............. frame duration in ms (default: 100)\n");
|
||||
printf(" -lossless ........... use lossless mode (default)\n");
|
||||
printf(" -lossy ... ........... use lossy mode\n");
|
||||
printf(" -lossless ............ use lossless mode (default)\n");
|
||||
printf(" -lossy ............... use lossy mode\n");
|
||||
printf(" -q <float> ........... quality\n");
|
||||
printf(" -m <int> ............. method to use\n");
|
||||
printf(" -m <int> ............. compression method (0=fast, 6=slowest), "
|
||||
"default=4\n");
|
||||
printf(" -exact, -noexact ..... preserve or alter RGB values in transparent "
|
||||
"area\n"
|
||||
" (default: -noexact, may cause artifacts\n"
|
||||
" with lossy animations)\n");
|
||||
|
||||
printf("\n");
|
||||
printf("example: img2webp -loop 2 in0.png -lossy in1.jpg\n"
|
||||
@ -146,6 +151,7 @@ int main(int argc, const char* argv[]) {
|
||||
WebPData webp_data;
|
||||
int c;
|
||||
int have_input = 0;
|
||||
int last_input_index = 0;
|
||||
CommandLineArguments cmd_args;
|
||||
int ok;
|
||||
|
||||
@ -224,6 +230,8 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
if (!have_input) {
|
||||
fprintf(stderr, "No input file(s) for generating animation!\n");
|
||||
ok = 0;
|
||||
Help();
|
||||
goto End;
|
||||
}
|
||||
|
||||
@ -248,6 +256,10 @@ int main(int argc, const char* argv[]) {
|
||||
fprintf(stderr, "Invalid negative duration (%d)\n", duration);
|
||||
parse_error = 1;
|
||||
}
|
||||
} else if (!strcmp(argv[c], "-exact")) {
|
||||
config.exact = 1;
|
||||
} else if (!strcmp(argv[c], "-noexact")) {
|
||||
config.exact = 0;
|
||||
} else {
|
||||
parse_error = 1; // shouldn't be here.
|
||||
fprintf(stderr, "Unknown option [%s]\n", argv[c]);
|
||||
@ -268,6 +280,7 @@ int main(int argc, const char* argv[]) {
|
||||
// read next input image
|
||||
pic.use_argb = 1;
|
||||
ok = ReadImage((const char*)GET_WARGV_SHIFTED(argv, c), &pic);
|
||||
last_input_index = c;
|
||||
if (!ok) goto End;
|
||||
|
||||
if (enc == NULL) {
|
||||
@ -306,6 +319,13 @@ int main(int argc, const char* argv[]) {
|
||||
++pic_num;
|
||||
}
|
||||
|
||||
for (c = last_input_index + 1; c < argc; ++c) {
|
||||
if (argv[c] != NULL) {
|
||||
fprintf(stderr, "Warning: unused option [%s]!"
|
||||
" Frame options go before the input frame.\n", argv[c]);
|
||||
}
|
||||
}
|
||||
|
||||
// add a last fake frame to signal the last duration
|
||||
ok = ok && WebPAnimEncoderAdd(enc, NULL, timestamp_ms, NULL);
|
||||
ok = ok && WebPAnimEncoderAssemble(enc, &webp_data);
|
||||
|
@ -568,7 +568,7 @@ int main(int argc, char* argv[]) {
|
||||
if (kParams.file_name == NULL) {
|
||||
printf("missing input file!!\n");
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (!ImgIoUtilReadFile(kParams.file_name,
|
||||
|
@ -1132,7 +1132,7 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
if (argc == 1) {
|
||||
Help();
|
||||
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
|
||||
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Parse command-line input.
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "webp/types.h"
|
||||
|
||||
#define XTRA_MAJ_VERSION 1
|
||||
#define XTRA_MIN_VERSION 4
|
||||
#define XTRA_MIN_VERSION 5
|
||||
#define XTRA_REV_VERSION 0
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -57,6 +57,12 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
INIT_WARGV(argc, argv);
|
||||
|
||||
if (argc == 1) {
|
||||
fprintf(stderr, "Usage: %s [-h] image.webp [more_files.webp...]\n",
|
||||
argv[0]);
|
||||
goto Error;
|
||||
}
|
||||
|
||||
for (c = 1; c < argc; ++c) {
|
||||
const char* file = NULL;
|
||||
const uint8_t* webp = NULL;
|
||||
|
@ -53,7 +53,7 @@ DEMUXLIBLIST=''
|
||||
if [[ -z "${SDK}" ]]; then
|
||||
echo "iOS SDK not available"
|
||||
exit 1
|
||||
elif [[ ${SDK%%.*} -gt 8 ]]; then
|
||||
elif [[ ${SDK%%.*} -gt 8 && "${XCODE%%.*}" -lt 16 ]]; then
|
||||
EXTRA_CFLAGS="-fembed-bitcode"
|
||||
elif [[ ${SDK%%.*} -le 6 ]]; then
|
||||
echo "You need iOS SDK version 6.0 or above"
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH CWEBP 1 "July 18, 2024"
|
||||
.TH CWEBP 1 "September 17, 2024"
|
||||
.SH NAME
|
||||
cwebp \- compress an image file to a WebP file
|
||||
.SH SYNOPSIS
|
||||
@ -180,8 +180,8 @@ Disable strong filtering (if filtering is being used thanks to the
|
||||
\fB\-f\fP option) and use simple filtering instead.
|
||||
.TP
|
||||
.B \-sharp_yuv
|
||||
Use more accurate and sharper RGB->YUV conversion if needed. Note that this
|
||||
process is slower than the default 'fast' RGB->YUV conversion.
|
||||
Use more accurate and sharper RGB->YUV conversion. Note that this process is
|
||||
slower than the default 'fast' RGB->YUV conversion.
|
||||
.TP
|
||||
.BI \-sns " int
|
||||
Specify the amplitude of the spatial noise shaping. Spatial noise shaping
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH GIF2WEBP 1 "July 18, 2024"
|
||||
.TH GIF2WEBP 1 "November 4, 2024"
|
||||
.SH NAME
|
||||
gif2webp \- Convert a GIF image to WebP
|
||||
.SH SYNOPSIS
|
||||
@ -39,6 +39,18 @@ Encode the image using lossy compression.
|
||||
Mixed compression mode: optimize compression of the image by picking either
|
||||
lossy or lossless compression for each frame heuristically.
|
||||
.TP
|
||||
.BI \-near_lossless " int
|
||||
Specify the level of near\-lossless image preprocessing. This option adjusts
|
||||
pixel values to help compressibility, but has minimal impact on the visual
|
||||
quality. It triggers lossless compression mode automatically. The range is 0
|
||||
(maximum preprocessing) to 100 (no preprocessing, the default). The typical
|
||||
value is around 60. Note that lossy with \fB\-q 100\fP can at times yield
|
||||
better results.
|
||||
.TP
|
||||
.B \-sharp_yuv
|
||||
Use more accurate and sharper RGB->YUV conversion. Note that this process is
|
||||
slower than the default 'fast' RGB->YUV conversion.
|
||||
.TP
|
||||
.BI \-q " float
|
||||
Specify the compression factor for RGB channels between 0 and 100. The default
|
||||
is 75.
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH IMG2WEBP 1 "July 18, 2024"
|
||||
.TH IMG2WEBP 1 "November 26, 2024"
|
||||
.SH NAME
|
||||
img2webp \- create animated WebP file from a sequence of input images.
|
||||
.SH SYNOPSIS
|
||||
@ -53,8 +53,8 @@ value is around 60. Note that lossy with \fB\-q 100\fP can at times yield
|
||||
better results.
|
||||
.TP
|
||||
.B \-sharp_yuv
|
||||
Use more accurate and sharper RGB->YUV conversion if needed. Note that this
|
||||
process is slower than the default 'fast' RGB->YUV conversion.
|
||||
Use more accurate and sharper RGB->YUV conversion. Note that this process is
|
||||
slower than the default 'fast' RGB->YUV conversion.
|
||||
.TP
|
||||
.BI \-loop " int
|
||||
Specifies the number of times the animation should loop. Using '0'
|
||||
@ -88,6 +88,15 @@ Specify the compression factor between 0 and 100. The default is 75.
|
||||
Specify the compression method to use. This parameter controls the
|
||||
trade off between encoding speed and the compressed file size and quality.
|
||||
Possible values range from 0 to 6. Default value is 4.
|
||||
When higher values are used, the encoder will spend more time inspecting
|
||||
additional encoding possibilities and decide on the quality gain.
|
||||
Lower value can result in faster processing time at the expense of
|
||||
larger file size and lower compression quality.
|
||||
.TP
|
||||
.B \-exact, \-noexact
|
||||
Preserve or alter RGB values in transparent area. The default is
|
||||
\fB-noexact\fP, to help compressibility. Note \fB\-noexact\fP may cause
|
||||
artifacts in frames compressed with \fB\-lossy\fP.
|
||||
|
||||
.SH EXIT STATUS
|
||||
If there were no problems during execution, \fBimg2webp\fP exits with the value
|
||||
|
@ -33,7 +33,7 @@ libsharpyuv_la_SOURCES += sharpyuv_gamma.c sharpyuv_gamma.h
|
||||
libsharpyuv_la_SOURCES += sharpyuv.c sharpyuv.h
|
||||
|
||||
libsharpyuv_la_CPPFLAGS = $(AM_CPPFLAGS)
|
||||
libsharpyuv_la_LDFLAGS = -no-undefined -version-info 1:0:1 -lm
|
||||
libsharpyuv_la_LDFLAGS = -no-undefined -version-info 1:1:1 -lm
|
||||
libsharpyuv_la_LIBADD =
|
||||
libsharpyuv_la_LIBADD += libsharpyuv_sse2.la
|
||||
libsharpyuv_la_LIBADD += libsharpyuv_neon.la
|
||||
|
@ -6,8 +6,8 @@
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 0,0,4,0
|
||||
PRODUCTVERSION 0,0,4,0
|
||||
FILEVERSION 0,0,4,1
|
||||
PRODUCTVERSION 0,0,4,1
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -24,12 +24,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libsharpyuv DLL"
|
||||
VALUE "FileVersion", "0.4.0"
|
||||
VALUE "FileVersion", "0.4.1"
|
||||
VALUE "InternalName", "libsharpyuv.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2024"
|
||||
VALUE "OriginalFilename", "libsharpyuv.dll"
|
||||
VALUE "ProductName", "SharpYuv Library"
|
||||
VALUE "ProductVersion", "0.4.0"
|
||||
VALUE "ProductVersion", "0.4.1"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -52,7 +52,7 @@ extern "C" {
|
||||
// SharpYUV API version following the convention from semver.org
|
||||
#define SHARPYUV_VERSION_MAJOR 0
|
||||
#define SHARPYUV_VERSION_MINOR 4
|
||||
#define SHARPYUV_VERSION_PATCH 0
|
||||
#define SHARPYUV_VERSION_PATCH 1
|
||||
// Version as a uint32_t. The major number is the high 8 bits.
|
||||
// The minor number is the middle 8 bits. The patch number is the low 16 bits.
|
||||
#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
|
||||
@ -66,10 +66,17 @@ extern "C" {
|
||||
SHARPYUV_EXTERN int SharpYuvGetVersion(void);
|
||||
|
||||
// RGB to YUV conversion matrix, in 16 bit fixed point.
|
||||
// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
|
||||
// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
|
||||
// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
|
||||
// Then y, u and v values are divided by 1<<16 and rounded.
|
||||
// y_ = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
|
||||
// u_ = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
|
||||
// v_ = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
|
||||
// Then the values are divided by 1<<16 and rounded.
|
||||
// y = (y_ + (1 << 15)) >> 16
|
||||
// u = (u_ + (1 << 15)) >> 16
|
||||
// v = (v_ + (1 << 15)) >> 16
|
||||
//
|
||||
// Typically, the offset values rgb_to_y[3], rgb_to_u[3] and rgb_to_v[3] depend
|
||||
// on the input's bit depth, e.g., rgb_to_u[3] = 1 << (rgb_bit_depth - 1 + 16).
|
||||
// See also sharpyuv_csp.h to get a predefined matrix or generate a matrix.
|
||||
typedef struct {
|
||||
int rgb_to_y[4];
|
||||
int rgb_to_u[4];
|
||||
@ -127,6 +134,8 @@ typedef enum SharpYuvTransferFunctionType {
|
||||
// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
|
||||
// should be multiples of 2.
|
||||
// width, height: width and height of the image in pixels
|
||||
// yuv_matrix: RGB to YUV conversion matrix. The matrix values typically
|
||||
// depend on the input's rgb_bit_depth.
|
||||
// This function calls SharpYuvConvertWithOptions with a default transfer
|
||||
// function of kSharpYuvTransferFunctionSrgb.
|
||||
SHARPYUV_EXTERN int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
|
||||
|
@ -22,16 +22,16 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
const float kr = yuv_color_space->kr;
|
||||
const float kb = yuv_color_space->kb;
|
||||
const float kg = 1.0f - kr - kb;
|
||||
const float cr = 0.5f / (1.0f - kb);
|
||||
const float cb = 0.5f / (1.0f - kr);
|
||||
const float cb = 0.5f / (1.0f - kb);
|
||||
const float cr = 0.5f / (1.0f - kr);
|
||||
|
||||
const int shift = yuv_color_space->bit_depth - 8;
|
||||
|
||||
const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
|
||||
float scale_y = 1.0f;
|
||||
float add_y = 0.0f;
|
||||
float scale_u = cr;
|
||||
float scale_v = cb;
|
||||
float scale_u = cb;
|
||||
float scale_v = cr;
|
||||
float add_uv = (float)(128 << shift);
|
||||
assert(yuv_color_space->bit_depth >= 8);
|
||||
|
||||
@ -59,31 +59,35 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
}
|
||||
|
||||
// Matrices are in YUV_FIX fixed point precision.
|
||||
// WebP's matrix, similar but not identical to kRec601LimitedMatrix.
|
||||
// WebP's matrix, similar but not identical to kRec601LimitedMatrix
|
||||
// Derived using the following formulas:
|
||||
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
|
||||
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
|
||||
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
|
||||
static const SharpYuvConversionMatrix kWebpMatrix = {
|
||||
{16839, 33059, 6420, 16 << 16},
|
||||
{-9719, -19081, 28800, 128 << 16},
|
||||
{28800, -24116, -4684, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
|
||||
{16829, 33039, 6416, 16 << 16},
|
||||
{-9714, -19071, 28784, 128 << 16},
|
||||
{28784, -24103, -4681, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec601FullMatrix = {
|
||||
{19595, 38470, 7471, 0},
|
||||
{-11058, -21710, 32768, 128 << 16},
|
||||
{32768, -27439, -5329, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
|
||||
{11966, 40254, 4064, 16 << 16},
|
||||
{-6596, -22189, 28784, 128 << 16},
|
||||
{28784, -26145, -2639, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec709FullMatrix = {
|
||||
{13933, 46871, 4732, 0},
|
||||
{-7509, -25259, 32768, 128 << 16},
|
||||
|
@ -41,10 +41,15 @@ SHARPYUV_EXTERN void SharpYuvComputeConversionMatrix(
|
||||
|
||||
// Enums for precomputed conversion matrices.
|
||||
typedef enum {
|
||||
// WebP's matrix, similar but not identical to kSharpYuvMatrixRec601Limited
|
||||
kSharpYuvMatrixWebp = 0,
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
kSharpYuvMatrixRec601Limited,
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeFull
|
||||
kSharpYuvMatrixRec601Full,
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
kSharpYuvMatrixRec709Limited,
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeFull
|
||||
kSharpYuvMatrixRec709Full,
|
||||
kSharpYuvMatrixNum
|
||||
} SharpYuvMatrixType;
|
||||
|
@ -36,7 +36,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
|
||||
# other than the ones listed on the command line, i.e., after linking, it will
|
||||
# not have unresolved symbols. Some platforms (Windows among them) require all
|
||||
# symbols in shared libraries to be resolved at library creation.
|
||||
libwebp_la_LDFLAGS = -no-undefined -version-info 8:9:1
|
||||
libwebp_la_LDFLAGS = -no-undefined -version-info 8:10:1
|
||||
libwebpincludedir = $(includedir)/webp
|
||||
pkgconfig_DATA = libwebp.pc
|
||||
|
||||
@ -48,7 +48,7 @@ if BUILD_LIBWEBPDECODER
|
||||
libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
|
||||
libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la
|
||||
|
||||
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 4:9:1
|
||||
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 4:10:1
|
||||
pkgconfig_DATA += libwebpdecoder.pc
|
||||
endif
|
||||
|
||||
|
@ -32,7 +32,7 @@ extern "C" {
|
||||
|
||||
// version numbers
|
||||
#define DEC_MAJ_VERSION 1
|
||||
#define DEC_MIN_VERSION 4
|
||||
#define DEC_MIN_VERSION 5
|
||||
#define DEC_REV_VERSION 0
|
||||
|
||||
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||
|
@ -13,6 +13,6 @@ noinst_HEADERS =
|
||||
noinst_HEADERS += ../webp/format_constants.h
|
||||
|
||||
libwebpdemux_la_LIBADD = ../libwebp.la
|
||||
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:15:0
|
||||
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:16:0
|
||||
libwebpdemuxincludedir = $(includedir)/webp
|
||||
pkgconfig_DATA = libwebpdemux.pc
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "src/webp/format_constants.h"
|
||||
|
||||
#define DMUX_MAJ_VERSION 1
|
||||
#define DMUX_MIN_VERSION 4
|
||||
#define DMUX_MIN_VERSION 5
|
||||
#define DMUX_REV_VERSION 0
|
||||
|
||||
typedef struct {
|
||||
|
@ -6,8 +6,8 @@
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 1,0,4,0
|
||||
PRODUCTVERSION 1,0,4,0
|
||||
FILEVERSION 1,0,5,0
|
||||
PRODUCTVERSION 1,0,5,0
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -24,12 +24,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libwebpdemux DLL"
|
||||
VALUE "FileVersion", "1.4.0"
|
||||
VALUE "FileVersion", "1.5.0"
|
||||
VALUE "InternalName", "libwebpdemux.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2024"
|
||||
VALUE "OriginalFilename", "libwebpdemux.dll"
|
||||
VALUE "ProductName", "WebP Image Demuxer"
|
||||
VALUE "ProductVersion", "1.4.0"
|
||||
VALUE "ProductVersion", "1.5.0"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -354,8 +354,8 @@ static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_C(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_C(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
int n;
|
||||
res->last = -1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
|
@ -96,8 +96,8 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
const int16_t* p_coeffs = (int16_t*)coeffs;
|
||||
int temp0, temp1, temp2, n, n1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
|
@ -19,8 +19,8 @@
|
||||
static const uint8_t position[16] = { 1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16 };
|
||||
|
||||
static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_NEON(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
const int16x8_t minus_one = vdupq_n_s16(-1);
|
||||
const int16x8_t coeffs_0 = vld1q_s16(coeffs);
|
||||
const int16x8_t coeffs_1 = vld1q_s16(coeffs + 8);
|
||||
|
@ -22,8 +22,8 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
|
||||
const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
|
||||
// Use SSE2 to compare 16 values with a single instruction.
|
||||
|
@ -38,7 +38,8 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
} while (0)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -82,7 +83,8 @@ static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
|
||||
// Simplified transform when only in[0], in[1] and in[4] are non-zero
|
||||
static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -95,7 +97,8 @@ static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
#undef STORE2
|
||||
|
||||
static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne_C(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne_C(in + 16, dst + 4);
|
||||
@ -103,13 +106,15 @@ static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformUV_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformUV_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
VP8Transform(in + 0 * 16, dst, 1);
|
||||
VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int DC = in[0] + 4;
|
||||
int i, j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
@ -120,7 +125,8 @@ static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDCUV_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
|
||||
if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
|
||||
if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
|
||||
@ -133,7 +139,8 @@ static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
// Paragraph 14.3
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int tmp[16];
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -161,7 +168,7 @@ static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
VP8WHT VP8TransformWHT;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Intra predictions
|
||||
@ -661,32 +668,32 @@ static void HFilter16i_C(uint8_t* p, int stride,
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -694,8 +701,8 @@ static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
static void DitherCombine8x8_C(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
|
||||
int i, j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -730,8 +737,8 @@ VP8SimpleFilterFunc VP8SimpleHFilter16;
|
||||
VP8SimpleFilterFunc VP8SimpleVFilter16i;
|
||||
VP8SimpleFilterFunc VP8SimpleHFilter16i;
|
||||
|
||||
void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride);
|
||||
void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride);
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8DspInitSSE2(void);
|
||||
|
@ -133,26 +133,26 @@ static void HFilter16(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -215,7 +215,8 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14;
|
||||
@ -532,7 +533,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
|
@ -21,7 +21,8 @@
|
||||
static const int kC1 = WEBP_TRANSFORM_AC3_C1;
|
||||
static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
|
||||
|
||||
__asm__ volatile (
|
||||
@ -45,7 +46,8 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int a = in[0] + 4;
|
||||
int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -81,7 +83,8 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
|
||||
|
||||
@ -148,7 +151,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
@ -434,14 +438,14 @@ static void HFilter16(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -465,14 +469,14 @@ static void HFilter16i(uint8_t* p, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
@ -38,7 +38,8 @@
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
@ -65,14 +66,16 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
v8i16 input0, input1;
|
||||
const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
|
||||
const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||
@ -114,13 +117,15 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
out[240] = __msa_copy_s_h(out1, 7);
|
||||
}
|
||||
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int DC = (in[0] + 4) >> 3;
|
||||
const v8i16 tmp0 = __msa_fill_h(DC);
|
||||
ADDBLK_ST4x4_UB(tmp0, tmp0, tmp0, tmp0, dst, BPS);
|
||||
}
|
||||
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -475,8 +480,8 @@ static void HFilter16i(uint8_t* src_y, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variants, for chroma filtering
|
||||
static void VFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
int stride, int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint8_t* ptmp_src_u = src_u - 4 * stride;
|
||||
uint8_t* ptmp_src_v = src_v - 4 * stride;
|
||||
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
|
||||
@ -520,8 +525,8 @@ static void VFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
SD(q2_d, ptmp_src_v);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
int stride, int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint8_t* ptmp_src_u = src_u - 4;
|
||||
uint8_t* ptmp_src_v = src_v - 4;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
@ -556,7 +561,8 @@ static void HFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
ST6x4_UB(tmp7, 0, tmp5, 4, ptmp_src_v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
|
||||
uint8_t* WEBP_RESTRICT src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint64_t p1_d, p0_d, q0_d, q1_d;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
@ -587,7 +593,8 @@ static void VFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
SD4(q1_d, q0_d, p0_d, p1_d, src_v, -stride);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
|
||||
uint8_t* WEBP_RESTRICT src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
|
@ -916,8 +916,8 @@ static void HFilter16i_NEON(uint8_t* p, int stride,
|
||||
#endif // !WORK_AROUND_GCC
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
|
||||
{
|
||||
@ -932,7 +932,8 @@ static void VFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
Store8x2x2_NEON(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
|
||||
}
|
||||
}
|
||||
static void VFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
static void VFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
u += 4 * stride;
|
||||
@ -949,8 +950,8 @@ static void VFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
}
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
static void HFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
|
||||
{
|
||||
@ -964,7 +965,8 @@ static void HFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void HFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
static void HFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
u += 4;
|
||||
@ -1041,7 +1043,8 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass_NEON(&rows);
|
||||
@ -1051,7 +1054,8 @@ static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
|
||||
#else
|
||||
|
||||
static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int kBPS = BPS;
|
||||
// kC1, kC2. Padded because vld1.16 loads 8 bytes
|
||||
const int16_t constants[4] = { kC1, kC2, 0, 0 };
|
||||
@ -1184,14 +1188,16 @@ static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void TransformTwo_NEON(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne_NEON(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne_NEON(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformDC_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int16x8_t DC = vdupq_n_s16(in[0]);
|
||||
Add4x4_NEON(DC, DC, dst);
|
||||
}
|
||||
@ -1205,7 +1211,8 @@ static void TransformDC_NEON(const int16_t* in, uint8_t* dst) {
|
||||
*dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
|
||||
} while (0)
|
||||
|
||||
static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int32x4x4_t tmp;
|
||||
|
||||
{
|
||||
@ -1256,7 +1263,8 @@ static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void TransformAC3_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int16x4_t A = vld1_dup_s16(in);
|
||||
const int16x4_t c4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
|
||||
const int16x4_t d4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
|
||||
|
@ -30,7 +30,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void Transform_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -197,7 +198,8 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
|
||||
static void TransformAC3_SSE2(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i A = _mm_set1_epi16(in[0] + 4);
|
||||
const __m128i c4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
|
||||
const __m128i d4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
|
||||
@ -792,8 +794,8 @@ static void HFilter16i_SSE2(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, p2, p1, p0, q0, q1, q2;
|
||||
|
||||
@ -817,8 +819,8 @@ static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q2, u, v, 2 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
@ -837,7 +839,8 @@ static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
@ -863,7 +866,8 @@ static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q1, u, v, 1 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
|
153
src/dsp/dsp.h
153
src/dsp/dsp.h
@ -60,53 +60,66 @@ extern "C" {
|
||||
// Transforms
|
||||
// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
|
||||
// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
|
||||
typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two);
|
||||
typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
|
||||
typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
|
||||
typedef void (*VP8Idct)(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two);
|
||||
typedef void (*VP8Fdct)(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out);
|
||||
typedef void (*VP8WHT)(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out);
|
||||
extern VP8Idct VP8ITransform;
|
||||
extern VP8Fdct VP8FTransform;
|
||||
extern VP8Fdct VP8FTransform2; // performs two transforms at a time
|
||||
extern VP8WHT VP8FTransformWHT;
|
||||
// Predictions
|
||||
// *dst is the destination block. *top and *left can be NULL.
|
||||
typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top);
|
||||
typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top);
|
||||
typedef void (*VP8IntraPreds)(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top);
|
||||
typedef void (*VP8Intra4Preds)(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top);
|
||||
extern VP8Intra4Preds VP8EncPredLuma4;
|
||||
extern VP8IntraPreds VP8EncPredLuma16;
|
||||
extern VP8IntraPreds VP8EncPredChroma8;
|
||||
|
||||
typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
|
||||
typedef int (*VP8Metric)(const uint8_t* WEBP_RESTRICT pix,
|
||||
const uint8_t* WEBP_RESTRICT ref);
|
||||
extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
|
||||
typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
|
||||
const uint16_t* const weights);
|
||||
typedef int (*VP8WMetric)(const uint8_t* WEBP_RESTRICT pix,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint16_t* WEBP_RESTRICT const weights);
|
||||
// The weights for VP8TDisto4x4 and VP8TDisto16x16 contain a row-major
|
||||
// 4 by 4 symmetric matrix.
|
||||
extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
|
||||
|
||||
// Compute the average (DC) of four 4x4 blocks.
|
||||
// Each sub-4x4 block #i sum is stored in dc[i].
|
||||
typedef void (*VP8MeanMetric)(const uint8_t* ref, uint32_t dc[4]);
|
||||
typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref,
|
||||
uint32_t dc[4]);
|
||||
extern VP8MeanMetric VP8Mean16x4;
|
||||
|
||||
typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
|
||||
typedef void (*VP8BlockCopy)(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
extern VP8BlockCopy VP8Copy4x4;
|
||||
extern VP8BlockCopy VP8Copy16x8;
|
||||
// Quantization
|
||||
struct VP8Matrix; // forward declaration
|
||||
typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* const mtx);
|
||||
typedef int (*VP8QuantizeBlock)(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
|
||||
typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
|
||||
const struct VP8Matrix* const mtx);
|
||||
typedef int (*VP8Quantize2Blocks)(
|
||||
int16_t in[32], int16_t out[32],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
|
||||
extern VP8QuantizeBlock VP8EncQuantizeBlock;
|
||||
extern VP8Quantize2Blocks VP8EncQuantize2Blocks;
|
||||
|
||||
// specific to 2nd transform:
|
||||
typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* const mtx);
|
||||
typedef int (*VP8QuantizeBlockWHT)(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
|
||||
|
||||
extern const int VP8DspScan[16 + 4 + 4];
|
||||
@ -118,9 +131,10 @@ typedef struct {
|
||||
int max_value;
|
||||
int last_non_zero;
|
||||
} VP8Histogram;
|
||||
typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
|
||||
typedef void (*VP8CHisto)(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo);
|
||||
VP8Histogram* WEBP_RESTRICT const histo);
|
||||
extern VP8CHisto VP8CollectHistogram;
|
||||
// General-purpose util function to help VP8CollectHistogram().
|
||||
void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
@ -138,8 +152,9 @@ extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
|
||||
extern const uint8_t VP8EncBands[16 + 1];
|
||||
|
||||
struct VP8Residual;
|
||||
typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
|
||||
struct VP8Residual* const res);
|
||||
typedef void (*VP8SetResidualCoeffsFunc)(
|
||||
const int16_t* WEBP_RESTRICT const coeffs,
|
||||
struct VP8Residual* WEBP_RESTRICT const res);
|
||||
extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
||||
|
||||
// Cost calculation function.
|
||||
@ -193,9 +208,11 @@ void VP8SSIMDspInit(void);
|
||||
//------------------------------------------------------------------------------
|
||||
// Decoding
|
||||
|
||||
typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
|
||||
typedef void (*VP8DecIdct)(const int16_t* WEBP_RESTRICT coeffs,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
// when doing two transforms, coeffs is actually int16_t[2][16].
|
||||
typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
|
||||
typedef void (*VP8DecIdct2)(const int16_t* WEBP_RESTRICT coeffs,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two);
|
||||
extern VP8DecIdct2 VP8Transform;
|
||||
extern VP8DecIdct VP8TransformAC3;
|
||||
extern VP8DecIdct VP8TransformUV;
|
||||
@ -233,7 +250,8 @@ extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
|
||||
// regular filter (on both macroblock edges and inner edges)
|
||||
typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
|
||||
int thresh, int ithresh, int hev_t);
|
||||
typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
|
||||
typedef void (*VP8ChromaFilterFunc)(uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int stride,
|
||||
int thresh, int ithresh, int hev_t);
|
||||
// on outer edge
|
||||
extern VP8LumaFilterFunc VP8VFilter16;
|
||||
@ -253,8 +271,8 @@ extern VP8ChromaFilterFunc VP8HFilter8i;
|
||||
#define VP8_DITHER_DESCALE_ROUNDER (1 << (VP8_DITHER_DESCALE - 1))
|
||||
#define VP8_DITHER_AMP_BITS 7
|
||||
#define VP8_DITHER_AMP_CENTER (1 << VP8_DITHER_AMP_BITS)
|
||||
extern void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride);
|
||||
extern void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride);
|
||||
|
||||
// must be called before anything using the above
|
||||
void VP8DspInit(void);
|
||||
@ -267,10 +285,10 @@ void VP8DspInit(void);
|
||||
// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
|
||||
// bottom_y can be NULL if only one line of output is needed (at top/bottom).
|
||||
typedef void (*WebPUpsampleLinePairFunc)(
|
||||
const uint8_t* top_y, const uint8_t* bottom_y,
|
||||
const uint8_t* top_u, const uint8_t* top_v,
|
||||
const uint8_t* cur_u, const uint8_t* cur_v,
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len);
|
||||
const uint8_t* WEBP_RESTRICT top_y, const uint8_t* WEBP_RESTRICT bottom_y,
|
||||
const uint8_t* WEBP_RESTRICT top_u, const uint8_t* WEBP_RESTRICT top_v,
|
||||
const uint8_t* WEBP_RESTRICT cur_u, const uint8_t* WEBP_RESTRICT cur_v,
|
||||
uint8_t* WEBP_RESTRICT top_dst, uint8_t* WEBP_RESTRICT bottom_dst, int len);
|
||||
|
||||
#ifdef FANCY_UPSAMPLING
|
||||
|
||||
@ -280,13 +298,15 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
|
||||
#endif // FANCY_UPSAMPLING
|
||||
|
||||
// Per-row point-sampling methods.
|
||||
typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len);
|
||||
typedef void (*WebPSamplerRowFunc)(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len);
|
||||
// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
|
||||
void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||
const uint8_t* u, const uint8_t* v, int uv_stride,
|
||||
uint8_t* dst, int dst_stride,
|
||||
void WebPSamplerProcessPlane(const uint8_t* WEBP_RESTRICT y, int y_stride,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v, int uv_stride,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride,
|
||||
int width, int height, WebPSamplerRowFunc func);
|
||||
|
||||
// Sampling functions to convert rows of YUV to RGB(A)
|
||||
@ -298,9 +318,10 @@ extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
|
||||
WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
|
||||
|
||||
// YUV444->RGB converters
|
||||
typedef void (*WebPYUV444Converter)(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len);
|
||||
typedef void (*WebPYUV444Converter)(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len);
|
||||
|
||||
extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
|
||||
@ -316,26 +337,35 @@ void WebPInitYUV444Converters(void);
|
||||
// ARGB -> YUV converters
|
||||
|
||||
// Convert ARGB samples to luma Y.
|
||||
extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
||||
extern void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
// Convert ARGB samples to U/V with downsampling. do_store should be '1' for
|
||||
// even lines and '0' for odd ones. 'src_width' is the original width, not
|
||||
// the U/V one.
|
||||
extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
extern void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store);
|
||||
|
||||
// Convert a row of accumulated (four-values) of rgba32 toward U/V
|
||||
extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
extern void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
|
||||
// Convert RGB or BGR to Y
|
||||
extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
extern void (*WebPConvertRGB24ToY)(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
extern void (*WebPConvertBGR24ToY)(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
|
||||
// used for plain-C fallback.
|
||||
extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
extern void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store);
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
|
||||
// Must be called before using the above.
|
||||
void WebPInitConvertARGBToYUV(void);
|
||||
@ -348,8 +378,9 @@ struct WebPRescaler;
|
||||
// Import a row of data and save its contribution in the rescaler.
|
||||
// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
|
||||
// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
|
||||
typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
typedef void (*WebPRescalerImportRowFunc)(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
|
||||
extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
|
||||
extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
|
||||
@ -362,16 +393,19 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
|
||||
extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
|
||||
|
||||
// Plain-C implementation, as fall-back.
|
||||
extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowExpand_C(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
extern void WebPRescalerImportRowShrink_C(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
|
||||
|
||||
// Main entry calls:
|
||||
extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRow(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
// Export one row (starting at x_out position) from rescaler.
|
||||
extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
|
||||
|
||||
@ -480,8 +514,9 @@ typedef enum { // Filter types.
|
||||
WEBP_FILTER_FAST
|
||||
} WEBP_FILTER_TYPE;
|
||||
|
||||
typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
|
||||
int stride, uint8_t* out);
|
||||
typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out);
|
||||
// In-place un-filtering.
|
||||
// Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
|
||||
typedef void (*WebPUnfilterFunc)(const uint8_t* prev_line, const uint8_t* preds,
|
||||
|
132
src/dsp/enc.c
132
src/dsp/enc.c
@ -59,9 +59,10 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_C(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
@ -109,8 +110,9 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
|
||||
#define STORE(x, y, v) \
|
||||
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -146,7 +148,9 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
static void ITransform_C(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
@ -154,7 +158,9 @@ static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
int tmp[16];
|
||||
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
|
||||
@ -184,14 +190,16 @@ static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform2_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
VP8FTransform(src, ref, out);
|
||||
VP8FTransform(src + 4, ref + 4, out + 16);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void FTransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_C(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
// input is 12b signed
|
||||
int32_t tmp[16];
|
||||
int i;
|
||||
@ -234,8 +242,9 @@ static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
int j;
|
||||
if (top != NULL) {
|
||||
for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
|
||||
@ -244,8 +253,9 @@ static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
int size) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < size; ++j) {
|
||||
@ -256,8 +266,9 @@ static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top, int size) {
|
||||
int y;
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
@ -286,8 +297,9 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top,
|
||||
static WEBP_INLINE void DCMode(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size, int round, int shift) {
|
||||
int DC = 0;
|
||||
int j;
|
||||
@ -312,8 +324,9 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
@ -333,8 +346,9 @@ static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
static void Intra16Preds_C(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode(I16DC16 + dst, left, top, 16, 16, 5);
|
||||
VerticalPred(I16VE16 + dst, top, 16);
|
||||
HorizontalPred(I16HE16 + dst, left, 16);
|
||||
@ -345,13 +359,14 @@ static void Intra16Preds_C(uint8_t* dst,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 4x4 prediction
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64 || BPS != 32
|
||||
|
||||
#define DST(x, y) dst[(x) + (y) * BPS]
|
||||
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
// vertical
|
||||
static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const uint8_t vals[4] = {
|
||||
AVG3(top[-1], top[0], top[1]),
|
||||
AVG3(top[ 0], top[1], top[2]),
|
||||
@ -364,7 +379,8 @@ static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
// horizontal
|
||||
static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -376,14 +392,14 @@ static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -402,7 +418,7 @@ static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 0) = AVG3(D, C, B);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -420,7 +436,7 @@ static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 3) = AVG3(G, H, H);
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -442,7 +458,7 @@ static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -464,7 +480,7 @@ static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -479,7 +495,7 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -502,7 +518,7 @@ static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int x, y;
|
||||
const uint8_t* const clip = clip1 + 255 - top[-1];
|
||||
for (y = 0; y < 4; ++y) {
|
||||
@ -520,7 +536,8 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -533,13 +550,14 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
HU4(I4HU4 + dst, top);
|
||||
}
|
||||
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64 || BPS != 32
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Metric
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int w, int h) {
|
||||
int count = 0;
|
||||
int y, x;
|
||||
@ -554,21 +572,25 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 16, 16);
|
||||
}
|
||||
static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 16, 8);
|
||||
}
|
||||
static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 8, 8);
|
||||
}
|
||||
static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 4, 4);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_C(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
int k, x, y;
|
||||
for (k = 0; k < 4; ++k) {
|
||||
uint32_t avg = 0;
|
||||
@ -592,7 +614,8 @@ static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
static int TTransform(const uint8_t* WEBP_RESTRICT in,
|
||||
const uint16_t* WEBP_RESTRICT w) {
|
||||
int sum = 0;
|
||||
int tmp[16];
|
||||
int i;
|
||||
@ -626,15 +649,17 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_C(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int sum1 = TTransform(a, w);
|
||||
const int sum2 = TTransform(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_C(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -657,7 +682,7 @@ static const uint8_t kZigzag[16] = {
|
||||
|
||||
// Simple quantization
|
||||
static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int last = -1;
|
||||
int n;
|
||||
for (n = 0; n < 16; ++n) {
|
||||
@ -683,7 +708,7 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -694,7 +719,8 @@ static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
//------------------------------------------------------------------------------
|
||||
// Block copy
|
||||
|
||||
static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||
static WEBP_INLINE void Copy(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int w, int h) {
|
||||
int y;
|
||||
for (y = 0; y < h; ++y) {
|
||||
memcpy(dst, src, w);
|
||||
@ -703,11 +729,13 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||
}
|
||||
}
|
||||
|
||||
static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy4x4_C(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
Copy(src, dst, 4, 4);
|
||||
}
|
||||
|
||||
static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy16x8_C(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
Copy(src, dst, 16, 8);
|
||||
}
|
||||
|
||||
@ -769,8 +797,10 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_C;
|
||||
#endif
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64 || BPS != 32
|
||||
VP8EncPredLuma4 = Intra4Preds_C;
|
||||
#endif
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
VP8EncPredLuma16 = Intra16Preds_C;
|
||||
#endif
|
||||
|
||||
|
@ -109,9 +109,9 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
"sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
|
||||
int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
|
||||
@ -141,8 +141,9 @@ static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne_MIPS32(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_MIPS32(ref + 4, in + 16, dst + 4);
|
||||
@ -236,7 +237,7 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -358,8 +359,9 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
"msub %[temp6], %[temp0] \n\t" \
|
||||
"msub %[temp7], %[temp1] \n\t"
|
||||
|
||||
static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int tmp[32];
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
|
||||
@ -393,8 +395,9 @@ static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -475,8 +478,9 @@ static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
|
||||
int temp17, temp18, temp19, temp20;
|
||||
@ -537,7 +541,8 @@ static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
|
||||
GET_SSE_INNER(D, D + 1, D + 2, D + 3)
|
||||
|
||||
static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -571,7 +576,8 @@ static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -597,7 +603,8 @@ static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -619,7 +626,8 @@ static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
|
@ -141,8 +141,9 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const int c2217 = 2217;
|
||||
const int c5352 = 5352;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
@ -171,8 +172,9 @@ static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
|
||||
|
||||
@ -239,16 +241,18 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPSdspR2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
static void ITransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
|
||||
|
||||
@ -314,9 +318,9 @@ static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
return abs(temp3 - temp17) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -367,8 +371,8 @@ static int Disto16x16_MIPSdspR2(const uint8_t* const a,
|
||||
} while (0)
|
||||
|
||||
#define VERTICAL_PRED(DST, TOP, SIZE) \
|
||||
static WEBP_INLINE void VerticalPred##SIZE(uint8_t* (DST), \
|
||||
const uint8_t* (TOP)) { \
|
||||
static WEBP_INLINE void VerticalPred##SIZE( \
|
||||
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (TOP)) { \
|
||||
int j; \
|
||||
if ((TOP)) { \
|
||||
for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE)); \
|
||||
@ -383,8 +387,8 @@ VERTICAL_PRED(dst, top, 16)
|
||||
#undef VERTICAL_PRED
|
||||
|
||||
#define HORIZONTAL_PRED(DST, LEFT, SIZE) \
|
||||
static WEBP_INLINE void HorizontalPred##SIZE(uint8_t* (DST), \
|
||||
const uint8_t* (LEFT)) { \
|
||||
static WEBP_INLINE void HorizontalPred##SIZE( \
|
||||
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (LEFT)) { \
|
||||
if (LEFT) { \
|
||||
int j; \
|
||||
for (j = 0; j < (SIZE); ++j) { \
|
||||
@ -451,8 +455,9 @@ HORIZONTAL_PRED(dst, left, 16)
|
||||
} while (0)
|
||||
|
||||
#define TRUE_MOTION(DST, LEFT, TOP, SIZE) \
|
||||
static WEBP_INLINE void TrueMotion##SIZE(uint8_t* (DST), const uint8_t* (LEFT),\
|
||||
const uint8_t* (TOP)) { \
|
||||
static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST), \
|
||||
const uint8_t* WEBP_RESTRICT (LEFT), \
|
||||
const uint8_t* WEBP_RESTRICT (TOP)) { \
|
||||
if ((LEFT) != NULL) { \
|
||||
if ((TOP) != NULL) { \
|
||||
CLIP_TO_DST((DST), (LEFT), (TOP), (SIZE)); \
|
||||
@ -480,8 +485,9 @@ TRUE_MOTION(dst, left, top, 16)
|
||||
#undef CLIP_8B_TO_DST
|
||||
#undef CLIPPING
|
||||
|
||||
static WEBP_INLINE void DCMode16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int DC, DC1;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
|
||||
@ -543,8 +549,9 @@ static WEBP_INLINE void DCMode16(uint8_t* dst, const uint8_t* left,
|
||||
FILL_8_OR_16(dst, DC, 16);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int DC, DC1;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
|
||||
@ -588,7 +595,7 @@ static WEBP_INLINE void DCMode8(uint8_t* dst, const uint8_t* left,
|
||||
FILL_8_OR_16(dst, DC, 8);
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], 0(%[top]) \n\t"
|
||||
@ -609,7 +616,7 @@ static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int a10, a32, temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
const int c35 = 0xff00ff;
|
||||
__asm__ volatile (
|
||||
@ -664,7 +671,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VE4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], -1(%[top]) \n\t"
|
||||
@ -695,7 +702,7 @@ static void VE4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HE4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], -4(%[top]) \n\t"
|
||||
@ -731,7 +738,7 @@ static void HE4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int temp6, temp7, temp8, temp9, temp10, temp11;
|
||||
__asm__ volatile(
|
||||
@ -780,7 +787,7 @@ static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -830,7 +837,7 @@ static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int temp6, temp7, temp8, temp9, temp10, temp11;
|
||||
__asm__ volatile(
|
||||
@ -877,7 +884,7 @@ static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -926,7 +933,7 @@ static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -974,7 +981,7 @@ static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
__asm__ volatile (
|
||||
"ulw %[temp0], -5(%[top]) \n\t"
|
||||
@ -1013,8 +1020,9 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DCMode8(C8DC8 + dst, left, top);
|
||||
VerticalPred8(C8VE8 + dst, top);
|
||||
@ -1033,8 +1041,9 @@ static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode16(I16DC16 + dst, left, top);
|
||||
VerticalPred16(I16VE16 + dst, top);
|
||||
HorizontalPred16(I16HE16 + dst, left);
|
||||
@ -1043,7 +1052,8 @@ static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -1079,7 +1089,8 @@ static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
GET_SSE_INNER(C) \
|
||||
GET_SSE_INNER(D)
|
||||
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1109,7 +1120,8 @@ static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1131,7 +1143,8 @@ static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1149,7 +1162,8 @@ static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1273,7 +1287,7 @@ static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
"3: \n\t"
|
||||
|
||||
static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
|
||||
int sign, coeff, level;
|
||||
int max_level = MAX_LEVEL;
|
||||
@ -1314,7 +1328,7 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -1360,7 +1374,8 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
"usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
|
||||
"usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
|
||||
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
|
||||
|
@ -41,8 +41,9 @@
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
@ -69,16 +70,18 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void ITransform_MSA(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform_MSA(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
uint64_t out0, out1, out2, out3;
|
||||
uint32_t in0, in1, in2, in3;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
@ -131,7 +134,8 @@ static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
SD4(out0, out1, out2, out3, out, 8);
|
||||
}
|
||||
|
||||
static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_MSA(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
v8i16 in0 = { 0 };
|
||||
v8i16 in1 = { 0 };
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3;
|
||||
@ -168,7 +172,8 @@ static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
ST_SH2(out0, out1, out, 8);
|
||||
}
|
||||
|
||||
static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
static int TTransform_MSA(const uint8_t* WEBP_RESTRICT in,
|
||||
const uint16_t* WEBP_RESTRICT w) {
|
||||
int sum;
|
||||
uint32_t in0_m, in1_m, in2_m, in3_m;
|
||||
v16i8 src0 = { 0 };
|
||||
@ -200,15 +205,17 @@ static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MSA(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int sum1 = TTransform_MSA(a, w);
|
||||
const int sum2 = TTransform_MSA(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MSA(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -259,7 +266,9 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
|
||||
#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
// vertical
|
||||
static WEBP_INLINE void VE4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top - 1);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
@ -272,7 +281,9 @@ static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
SW4(out, out, out, out, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
// horizontal
|
||||
static WEBP_INLINE void HE4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -284,7 +295,8 @@ static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
@ -293,7 +305,8 @@ static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
SW4(dc, dc, dc, dc, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16u8 A2 = { 0 };
|
||||
const uint64_t val_m = LD(top - 5);
|
||||
const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
|
||||
@ -313,7 +326,8 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
SW4(val3, val2, val1, val0, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void LD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
@ -333,7 +347,8 @@ static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
SW4(val0, val1, val2, val3, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VR4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -354,7 +369,8 @@ static WEBP_INLINE void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VL4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -375,7 +391,8 @@ static WEBP_INLINE void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -390,7 +407,8 @@ static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -411,7 +429,8 @@ static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void TM4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16i8 zero = { 0 };
|
||||
const v8i16 TL = (v8i16)__msa_fill_h(top[-1]);
|
||||
const v8i16 L0 = (v8i16)__msa_fill_h(top[-2]);
|
||||
@ -431,7 +450,8 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
#undef AVG3
|
||||
#undef AVG2
|
||||
|
||||
static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -451,7 +471,8 @@ static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
|
||||
ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void VerticalPred16x16(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VerticalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
const v16u8 out = LD_UB(top);
|
||||
STORE16x16(out, dst);
|
||||
@ -461,8 +482,8 @@ static WEBP_INLINE void VerticalPred16x16(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred16x16(uint8_t* dst,
|
||||
const uint8_t* left) {
|
||||
static WEBP_INLINE void HorizontalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < 16; j += 4) {
|
||||
@ -480,8 +501,9 @@ static WEBP_INLINE void HorizontalPred16x16(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion16x16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void TrueMotion16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
int j;
|
||||
@ -519,8 +541,9 @@ static WEBP_INLINE void TrueMotion16x16(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int DC;
|
||||
v16u8 out;
|
||||
if (top != NULL && left != NULL) {
|
||||
@ -548,8 +571,9 @@ static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
|
||||
STORE16x16(out, dst);
|
||||
}
|
||||
|
||||
static void Intra16Preds_MSA(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode16x16(I16DC16 + dst, left, top);
|
||||
VerticalPred16x16(I16VE16 + dst, top);
|
||||
HorizontalPred16x16(I16HE16 + dst, left);
|
||||
@ -574,7 +598,8 @@ static void Intra16Preds_MSA(uint8_t* dst,
|
||||
SD4(out, out, out, out, dst + 4 * BPS, BPS); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void VerticalPred8x8(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VerticalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
const uint64_t out = LD(top);
|
||||
STORE8x8(out, dst);
|
||||
@ -584,7 +609,8 @@ static WEBP_INLINE void VerticalPred8x8(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred8x8(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HorizontalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < 8; j += 4) {
|
||||
@ -606,8 +632,9 @@ static WEBP_INLINE void HorizontalPred8x8(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void TrueMotion8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
int j;
|
||||
@ -646,8 +673,9 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
uint64_t out;
|
||||
v16u8 src = { 0 };
|
||||
if (top != NULL && left != NULL) {
|
||||
@ -670,8 +698,9 @@ static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
STORE8x8(out, dst);
|
||||
}
|
||||
|
||||
static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DCMode8x8(C8DC8 + dst, left, top);
|
||||
VerticalPred8x8(C8VE8 + dst, top);
|
||||
@ -712,7 +741,8 @@ static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -739,7 +769,8 @@ static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -758,7 +789,8 @@ static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -778,7 +810,8 @@ static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum = 0;
|
||||
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
|
||||
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
|
||||
@ -801,7 +834,7 @@ static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
// Quantization
|
||||
|
||||
static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int sum;
|
||||
v8i16 in0, in1, sh0, sh1, out0, out1;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
|
||||
@ -854,7 +887,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
|
@ -60,8 +60,8 @@ static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
|
||||
|
||||
static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
|
||||
const int16x8_t row23,
|
||||
const uint8_t* const ref,
|
||||
uint8_t* const dst) {
|
||||
const uint8_t* WEBP_RESTRICT const ref,
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
uint32x2_t dst01 = vdup_n_u32(0);
|
||||
uint32x2_t dst23 = vdup_n_u32(0);
|
||||
|
||||
@ -120,8 +120,9 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass_NEON(&rows);
|
||||
@ -131,8 +132,9 @@ static void ITransformOne_NEON(const uint8_t* ref,
|
||||
|
||||
#else
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int kBPS = BPS;
|
||||
const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
|
||||
|
||||
@ -247,8 +249,9 @@ static void ITransformOne_NEON(const uint8_t* ref,
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void ITransform_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void ITransform_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne_NEON(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_NEON(ref + 4, in + 16, dst + 4);
|
||||
@ -294,8 +297,9 @@ static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
|
||||
return vreinterpretq_s16_u16(vsubl_u8(a, b));
|
||||
}
|
||||
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int16x8_t d0d1, d3d2; // working 4x4 int16 variables
|
||||
{
|
||||
const uint8x16_t S0 = Load4x4_NEON(src);
|
||||
@ -364,8 +368,9 @@ static const int32_t kCoeff32[] = {
|
||||
51000, 51000, 51000, 51000
|
||||
};
|
||||
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const int kBPS = BPS;
|
||||
const uint8_t* src_ptr = src;
|
||||
const uint8_t* ref_ptr = ref;
|
||||
@ -484,7 +489,8 @@ static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
src += stride; \
|
||||
} while (0)
|
||||
|
||||
static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
|
||||
static void FTransformWHT_NEON(const int16_t* WEBP_RESTRICT src,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const int stride = 16;
|
||||
const int16x4_t zero = vdup_n_s16(0);
|
||||
int32x4x4_t tmp0;
|
||||
@ -659,8 +665,9 @@ static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
|
||||
@ -701,8 +708,9 @@ static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
}
|
||||
#undef LOAD_LANE_32b
|
||||
|
||||
static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -715,9 +723,10 @@ static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -747,9 +756,9 @@ static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
uint32x4_t* const sum) {
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(
|
||||
const uint8_t* WEBP_RESTRICT const a, const uint8_t* WEBP_RESTRICT const b,
|
||||
uint32x4_t* const sum) {
|
||||
const uint8x16_t a0 = vld1q_u8(a);
|
||||
const uint8x16_t b0 = vld1q_u8(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
@ -775,7 +784,8 @@ static int SumToInt_NEON(uint32x4_t sum) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 16; ++y) {
|
||||
@ -784,7 +794,8 @@ static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
@ -793,7 +804,8 @@ static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
@ -806,7 +818,8 @@ static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
const uint8x16_t a0 = Load4x4_NEON(a);
|
||||
const uint8x16_t b0 = Load4x4_NEON(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
@ -825,8 +838,9 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
// Compilation with gcc-4.6.x is problematic for now.
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
|
||||
static int16x8_t Quantize_NEON(int16_t* const in,
|
||||
const VP8Matrix* const mtx, int offset) {
|
||||
static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx,
|
||||
int offset) {
|
||||
const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
|
||||
const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
|
||||
const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
|
||||
@ -860,7 +874,7 @@ static const uint8_t kShuffles[4][8] = {
|
||||
};
|
||||
|
||||
static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const int16x8_t out0 = Quantize_NEON(in, mtx, 0);
|
||||
const int16x8_t out1 = Quantize_NEON(in, mtx, 8);
|
||||
uint8x8x4_t shuffles;
|
||||
@ -902,7 +916,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -913,6 +927,7 @@ static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
|
||||
#if WEBP_AARCH64
|
||||
|
||||
#if BPS == 32
|
||||
#define DC4_VE4_HE4_TM4_NEON(dst, tbl, res, lane) \
|
||||
do { \
|
||||
uint8x16_t r; \
|
||||
@ -930,7 +945,8 @@ static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
vst1q_u8(dst, r); \
|
||||
} while (0)
|
||||
|
||||
static void Intra4Preds_NEON(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13
|
||||
// L K J I X A B C D E F G H
|
||||
// -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7
|
||||
@ -1023,6 +1039,7 @@ static void Intra4Preds_NEON(uint8_t* dst, const uint8_t* top) {
|
||||
vst1_u8(dst + I4HD4 + BPS * 2, vget_low_u8(result1));
|
||||
vst1_u8(dst + I4HD4 + BPS * 3, vget_high_u8(result1));
|
||||
}
|
||||
#endif // BPS == 32
|
||||
|
||||
static WEBP_INLINE void Fill_NEON(uint8_t* dst, const uint8_t value) {
|
||||
uint8x16_t a = vdupq_n_u8(value);
|
||||
@ -1162,8 +1179,9 @@ static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static void Intra16Preds_NEON(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void Intra16Preds_NEON(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode_NEON(I16DC16 + dst, left, top);
|
||||
VerticalPred16_NEON(I16VE16 + dst, top);
|
||||
HorizontalPred16_NEON(I16HE16 + dst, left);
|
||||
|
@ -26,8 +26,9 @@
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
// Does one inverse transform.
|
||||
static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -177,8 +178,9 @@ static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
|
||||
// Does two inverse transforms.
|
||||
static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -316,7 +318,9 @@ static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int do_two) {
|
||||
if (do_two) {
|
||||
ITransform_Two_SSE2(ref, in, dst);
|
||||
@ -373,7 +377,7 @@ static void FTransformPass1_SSE2(const __m128i* const in01,
|
||||
|
||||
static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
const __m128i* const v32,
|
||||
int16_t* out) {
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i seven = _mm_set1_epi16(7);
|
||||
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
|
||||
@ -424,8 +428,9 @@ static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
_mm_storeu_si128((__m128i*)&out[8], d2_f3);
|
||||
}
|
||||
|
||||
static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
// Load src.
|
||||
const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
|
||||
@ -468,8 +473,9 @@ static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
FTransformPass2_SSE2(&v01, &v32, out);
|
||||
}
|
||||
|
||||
static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform2_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load src and convert to 16b.
|
||||
@ -517,7 +523,8 @@ static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
FTransformPass2_SSE2(&v01h, &v32h, out + 16);
|
||||
}
|
||||
|
||||
static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
static void FTransformWHTRow_SSE2(const int16_t* WEBP_RESTRICT const in,
|
||||
__m128i* const out) {
|
||||
const __m128i kMult = _mm_set_epi16(-1, 1, -1, 1, 1, 1, 1, 1);
|
||||
const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
|
||||
const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
|
||||
@ -533,7 +540,8 @@ static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
*out = _mm_madd_epi16(D, kMult);
|
||||
}
|
||||
|
||||
static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
// Input is 12b signed.
|
||||
__m128i row0, row1, row2, row3;
|
||||
// Rows are 14b signed.
|
||||
@ -566,9 +574,10 @@ static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
// Compute susceptibility based on DCT-coeff histograms:
|
||||
// the higher, the "easier" the macroblock is to compress.
|
||||
|
||||
static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
@ -640,7 +649,8 @@ static WEBP_INLINE void Fill_SSE2(uint8_t* dst, int value, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int j;
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -648,7 +658,8 @@ static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_values = _mm_load_si128((const __m128i*)top);
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -656,8 +667,9 @@ static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
if (top != NULL) {
|
||||
if (size == 8) {
|
||||
VE8uv_SSE2(dst, top);
|
||||
@ -669,7 +681,8 @@ static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
@ -678,7 +691,8 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
@ -687,8 +701,9 @@ static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
int size) {
|
||||
if (left != NULL) {
|
||||
if (size == 8) {
|
||||
HE8uv_SSE2(dst, left);
|
||||
@ -700,8 +715,9 @@ static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top, int size) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
if (size == 8) {
|
||||
@ -728,8 +744,10 @@ static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
TM_SSE2(dst, left, top, size);
|
||||
@ -749,8 +767,9 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
|
||||
const __m128i combined = _mm_unpacklo_epi64(top_values, left_values);
|
||||
@ -758,7 +777,8 @@ static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
Put8x8uv_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i sum = _mm_sad_epu8(top_values, zero);
|
||||
@ -766,7 +786,8 @@ static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
Put8x8uv_SSE2(DC >> 3, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC8uvNoLeft_SSE2(dst, left);
|
||||
}
|
||||
@ -775,8 +796,9 @@ static WEBP_INLINE void DC8uvNoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put8x8uv_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC8uv_SSE2(dst, left, top);
|
||||
@ -790,8 +812,9 @@ static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const __m128i left_row = _mm_load_si128((const __m128i*)left);
|
||||
const int DC =
|
||||
@ -799,13 +822,15 @@ static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
Put16_SSE2(DC >> 5, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const int DC = VP8HorizontalAdd8b(&top_row) + 8;
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC16NoLeft_SSE2(dst, left);
|
||||
}
|
||||
@ -814,8 +839,9 @@ static WEBP_INLINE void DC16NoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put16_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC16_SSE2(dst, left, top);
|
||||
@ -844,8 +870,9 @@ static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
|
||||
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
|
||||
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // vertical
|
||||
// vertical
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -861,8 +888,9 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // horizontal
|
||||
// horizontal
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -874,15 +902,17 @@ static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill_SSE2(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-Left
|
||||
// Down-Left
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -898,8 +928,9 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Right
|
||||
// Vertical-Right
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -924,8 +955,9 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Left
|
||||
// Vertical-Left
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -951,8 +983,9 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
DST(3, 3) = (extra_out >> 8) & 0xff;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-right
|
||||
// Down-right
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
|
||||
const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
|
||||
@ -968,7 +1001,8 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -983,7 +1017,8 @@ static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -1006,7 +1041,8 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
|
||||
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
|
||||
@ -1028,7 +1064,8 @@ static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4_SSE2(I4DC4 + dst, top);
|
||||
TM4_SSE2(I4TM4 + dst, top);
|
||||
VE4_SSE2(I4VE4 + dst, top);
|
||||
@ -1044,8 +1081,9 @@ static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DC8uvMode_SSE2(C8DC8 + dst, left, top);
|
||||
VerticalPred_SSE2(C8VE8 + dst, top, 8);
|
||||
@ -1064,8 +1102,9 @@ static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC16Mode_SSE2(I16DC16 + dst, left, top);
|
||||
VerticalPred_SSE2(I16VE16 + dst, top, 16);
|
||||
HorizontalPred_SSE2(I16HE16 + dst, left, 16);
|
||||
@ -1092,7 +1131,8 @@ static WEBP_INLINE void SubtractAndAccumulate_SSE2(const __m128i a,
|
||||
*sum = _mm_add_epi32(sum1, sum2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int num_pairs) {
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
int32_t tmp[4];
|
||||
@ -1114,18 +1154,21 @@ static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
|
||||
}
|
||||
|
||||
static int SSE16x16_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return SSE_16xN_SSE2(a, b, 8);
|
||||
}
|
||||
|
||||
static int SSE16x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return SSE_16xN_SSE2(a, b, 4);
|
||||
}
|
||||
|
||||
#define LOAD_8x16b(ptr) \
|
||||
_mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
|
||||
|
||||
static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int num_pairs = 4;
|
||||
__m128i sum = zero;
|
||||
@ -1152,7 +1195,8 @@ static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
}
|
||||
#undef LOAD_8x16b
|
||||
|
||||
static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load values. Note that we read 8 pixels instead of 4,
|
||||
@ -1189,7 +1233,7 @@ static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_SSE2(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
const __m128i mask = _mm_set1_epi16(0x00ff);
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&ref[BPS * 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
|
||||
@ -1227,8 +1271,9 @@ static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
static int TTransform_SSE2(const uint8_t* WEBP_RESTRICT inA,
|
||||
const uint8_t* WEBP_RESTRICT inB,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int32_t sum[4];
|
||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -1328,14 +1373,16 @@ static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_SSE2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int diff_sum = TTransform_SSE2(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_SSE2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -1350,9 +1397,10 @@ static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
// Quantization
|
||||
//
|
||||
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const uint16_t* WEBP_RESTRICT const sharpen,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i coeff0, coeff8;
|
||||
@ -1463,17 +1511,17 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
|
@ -23,9 +23,10 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Compute susceptibility based on DCT-coeff histograms.
|
||||
|
||||
static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_SSE41(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -168,14 +169,16 @@ static int TTransform_SSE41(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_SSE41(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int diff_sum = TTransform_SSE41(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_SSE41(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -301,17 +304,17 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
#undef PSHUFB_CST
|
||||
|
||||
static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
|
@ -23,14 +23,16 @@
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
} while (0)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] - pred[i]);
|
||||
}
|
||||
@ -38,9 +40,9 @@ static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* out) {
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -66,9 +68,9 @@ static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* out) {
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -99,9 +101,9 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* out) {
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -136,18 +138,21 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void HorizontalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
@ -26,8 +26,9 @@
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
@ -101,7 +102,8 @@
|
||||
); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int length) {
|
||||
DO_PREDICT_LINE(src, dst, length, 0);
|
||||
}
|
||||
@ -191,9 +193,9 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride, uint8_t* out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -210,9 +212,9 @@ static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
|
||||
int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
@ -228,9 +230,9 @@ static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride, uint8_t* out) {
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -247,8 +249,9 @@ static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
@ -284,9 +287,9 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* out) {
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -303,8 +306,9 @@ static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,8 @@
|
||||
|
||||
static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int length) {
|
||||
v16u8 src0, pred0, dst0;
|
||||
assert(length >= 0);
|
||||
while (length >= 32) {
|
||||
@ -58,8 +59,9 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
@ -68,8 +70,9 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horrizontal filter
|
||||
|
||||
static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
const uint8_t* preds = data;
|
||||
const uint8_t* in = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -99,8 +102,8 @@ static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
|
||||
static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
const uint8_t* ppred,
|
||||
uint8_t* poutput, int stride,
|
||||
int size) {
|
||||
uint8_t* WEBP_RESTRICT poutput,
|
||||
int stride, int size) {
|
||||
int w;
|
||||
const v16i8 zero = { 0 };
|
||||
while (size >= 16) {
|
||||
@ -131,8 +134,9 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
}
|
||||
|
||||
|
||||
static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -159,8 +163,9 @@ static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter
|
||||
|
||||
static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
|
@ -23,8 +23,9 @@
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
@ -44,7 +45,7 @@
|
||||
#define ROTATE_RIGHT_N(A, N) vext_u8((A), (A), (8 - (N)) % 8)
|
||||
|
||||
static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
assert(length >= 0);
|
||||
for (i = 0; i + 16 <= length; i += 16) {
|
||||
@ -57,16 +58,17 @@ static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft_NEON(const uint8_t* src, uint8_t* dst, int length) {
|
||||
static void PredictLineLeft_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
PredictLine_NEON(src, src - 1, dst, length);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride, uint8_t* out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter_NEON(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -86,17 +88,18 @@ static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void HorizontalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void HorizontalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
|
||||
static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* out) {
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -115,8 +118,9 @@ static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void VerticalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
@ -130,7 +134,8 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
|
||||
static void GradientPredictDirect_NEON(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
uint8_t* WEBP_RESTRICT const out,
|
||||
int length) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= length; i += 8) {
|
||||
const uint8x8_t A = vld1_u8(&row[i - 1]);
|
||||
@ -146,9 +151,9 @@ static void GradientPredictDirect_NEON(const uint8_t* const row,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
|
||||
static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* out) {
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -167,8 +172,9 @@ static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void GradientFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
|
@ -27,13 +27,15 @@
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
} while (0)
|
||||
|
||||
static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
static void PredictLineTop_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -51,7 +53,8 @@ static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
static void PredictLineLeft_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -71,9 +74,9 @@ static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride, uint8_t* out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -96,9 +99,9 @@ static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* out) {
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -127,7 +130,8 @@ static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
|
||||
static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
uint8_t* WEBP_RESTRICT const out,
|
||||
int length) {
|
||||
const int max_pos = length & ~7;
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -151,9 +155,9 @@ static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* out) {
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -176,18 +180,21 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void HorizontalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void VerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void GradientFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
|
@ -182,13 +182,13 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
}
|
||||
|
||||
static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int x;
|
||||
(void)upper;
|
||||
for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
|
||||
}
|
||||
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint32_t left = out[-1];
|
||||
(void)upper;
|
||||
@ -441,8 +441,8 @@ static int is_big_endian(void) {
|
||||
return (tmp.b[0] != 1);
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -452,8 +452,8 @@ void VP8LConvertBGRAToRGB_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -464,8 +464,8 @@ void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -481,8 +481,8 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -498,8 +498,8 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -509,8 +509,8 @@ void VP8LConvertBGRAToBGR_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
|
||||
int swap_on_big_endian) {
|
||||
static void CopyOrSwap(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst, int swap_on_big_endian) {
|
||||
if (is_big_endian() == swap_on_big_endian) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "src/webp/types.h"
|
||||
#include "src/webp/decode.h"
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
@ -60,7 +61,7 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
// These Add/Sub function expects upper[-1] and out[-1] to be readable.
|
||||
typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
|
||||
const uint32_t* upper, int num_pixels,
|
||||
uint32_t* out);
|
||||
uint32_t* WEBP_RESTRICT out);
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
|
||||
|
||||
@ -91,8 +92,8 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
|
||||
const uint32_t* const in, uint32_t* const out);
|
||||
|
||||
// Color space conversion.
|
||||
typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst);
|
||||
typedef void (*VP8LConvertFunc)(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGB;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
|
||||
@ -127,13 +128,16 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst);
|
||||
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst);
|
||||
|
||||
@ -145,29 +149,32 @@ void VP8LDspInit(void);
|
||||
|
||||
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
|
||||
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
|
||||
uint32_t* dst, int num_pixels);
|
||||
typedef void (*VP8LTransformColorFunc)(
|
||||
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT dst,
|
||||
int num_pixels);
|
||||
extern VP8LTransformColorFunc VP8LTransformColor;
|
||||
typedef void (*VP8LCollectColorBlueTransformsFunc)(
|
||||
const uint32_t* argb, int stride,
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue, uint32_t histo[]);
|
||||
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
|
||||
|
||||
typedef void (*VP8LCollectColorRedTransformsFunc)(
|
||||
const uint32_t* argb, int stride,
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, uint32_t histo[]);
|
||||
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
|
||||
|
||||
// Expose some C-only fallback functions
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels);
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels);
|
||||
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, uint32_t histo[]);
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
uint32_t histo[]);
|
||||
@ -179,14 +186,17 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
||||
// Huffman-cost related functions.
|
||||
|
||||
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* WEBP_RESTRICT X,
|
||||
const uint32_t* WEBP_RESTRICT Y,
|
||||
int length);
|
||||
typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
|
||||
const uint32_t Y[256]);
|
||||
typedef uint64_t (*VP8LShannonEntropyFunc)(const uint32_t* X, int length);
|
||||
|
||||
extern VP8LCostFunc VP8LExtraCost;
|
||||
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||
extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||
extern VP8LShannonEntropyFunc VP8LShannonEntropy;
|
||||
|
||||
typedef struct { // small struct to hold counters
|
||||
int counts[2]; // index: 0=zero streak, 1=non-zero streak
|
||||
@ -208,26 +218,30 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy);
|
||||
// codec specific heuristics.
|
||||
typedef void (*VP8LGetCombinedEntropyUnrefinedFunc)(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats);
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats);
|
||||
extern VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
|
||||
|
||||
// Get the entropy for the distribution 'X'.
|
||||
typedef void (*VP8LGetEntropyUnrefinedFunc)(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats);
|
||||
typedef void (*VP8LGetEntropyUnrefinedFunc)(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats);
|
||||
extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
|
||||
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
VP8LBitEntropy* const entropy);
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy);
|
||||
|
||||
typedef void (*VP8LAddVectorFunc)(const uint32_t* a, const uint32_t* b,
|
||||
uint32_t* out, int size);
|
||||
typedef void (*VP8LAddVectorFunc)(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size);
|
||||
extern VP8LAddVectorFunc VP8LAddVector;
|
||||
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* a, uint32_t* out, int size);
|
||||
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size);
|
||||
extern VP8LAddVectorEqFunc VP8LAddVectorEq;
|
||||
void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out);
|
||||
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
|
||||
const VP8LHistogram* WEBP_RESTRICT const b,
|
||||
VP8LHistogram* WEBP_RESTRICT const out);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
@ -237,11 +251,12 @@ typedef int (*VP8LVectorMismatchFunc)(const uint32_t* const array1,
|
||||
// Returns the first index where array1 and array2 are different.
|
||||
extern VP8LVectorMismatchFunc VP8LVectorMismatch;
|
||||
|
||||
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* const row, int width,
|
||||
int xbits, uint32_t* dst);
|
||||
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits,
|
||||
uint32_t* WEBP_RESTRICT dst);
|
||||
extern VP8LBundleColorMapFunc VP8LBundleColorMap;
|
||||
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst);
|
||||
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits, uint32_t* WEBP_RESTRICT dst);
|
||||
|
||||
// Must be called before calling any of the above methods.
|
||||
void VP8LEncDspInit(void);
|
||||
|
@ -194,15 +194,15 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
|
||||
|
||||
// The predictor is added to the output pixel (which
|
||||
// is therefore considered as a residual) to get the final prediction.
|
||||
#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \
|
||||
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
|
||||
out[x] = VP8LAddPixels(in[x], pred); \
|
||||
} \
|
||||
#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \
|
||||
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
|
||||
out[x] = VP8LAddPixels(in[x], pred); \
|
||||
} \
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -336,6 +336,21 @@ static uint64_t CombinedShannonEntropy_C(const uint32_t X[256],
|
||||
return retval;
|
||||
}
|
||||
|
||||
static uint64_t ShannonEntropy_C(const uint32_t* X, int n) {
|
||||
int i;
|
||||
uint64_t retval = 0;
|
||||
uint32_t sumX = 0;
|
||||
for (i = 0; i < n; ++i) {
|
||||
const int x = X[i];
|
||||
if (x != 0) {
|
||||
sumX += x;
|
||||
retval += VP8LFastSLog2(x);
|
||||
}
|
||||
}
|
||||
retval = VP8LFastSLog2(sumX) - retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
|
||||
entropy->entropy = 0;
|
||||
entropy->sum = 0;
|
||||
@ -344,8 +359,8 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
|
||||
entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
|
||||
}
|
||||
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
VP8LBitEntropy* const entropy) {
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy) {
|
||||
int i;
|
||||
|
||||
VP8LBitEntropyInit(entropy);
|
||||
@ -365,8 +380,10 @@ void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
}
|
||||
|
||||
static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
|
||||
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
|
||||
int* WEBP_RESTRICT const i_prev,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
const int streak = i - *i_prev;
|
||||
|
||||
// Gather info for the bit entropy.
|
||||
@ -388,9 +405,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
*i_prev = i;
|
||||
}
|
||||
|
||||
static void GetEntropyUnrefined_C(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetEntropyUnrefined_C(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i;
|
||||
int i_prev = 0;
|
||||
uint32_t x_prev = X[0];
|
||||
@ -409,11 +427,10 @@ static void GetEntropyUnrefined_C(const uint32_t X[], int length,
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
|
||||
const uint32_t Y[],
|
||||
int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetCombinedEntropyUnrefined_C(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i = 1;
|
||||
int i_prev = 0;
|
||||
uint32_t xy_prev = X[0] + Y[0];
|
||||
@ -453,8 +470,8 @@ static WEBP_INLINE int8_t U32ToS8(uint32_t v) {
|
||||
return (int8_t)(v & 0xff);
|
||||
}
|
||||
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint32_t argb = data[i];
|
||||
@ -490,7 +507,8 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
|
||||
return (new_blue & 0xff);
|
||||
}
|
||||
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, uint32_t histo[]) {
|
||||
while (tile_height-- > 0) {
|
||||
@ -502,7 +520,8 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
uint32_t histo[]) {
|
||||
@ -529,8 +548,8 @@ static int VectorMismatch_C(const uint32_t* const array1,
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst) {
|
||||
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits, uint32_t* WEBP_RESTRICT dst) {
|
||||
int x;
|
||||
if (xbits > 0) {
|
||||
const int bit_depth = 1 << (3 - xbits);
|
||||
@ -561,7 +580,8 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X,
|
||||
const uint32_t* WEBP_RESTRICT Y,
|
||||
int length) {
|
||||
int i;
|
||||
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
|
||||
@ -576,13 +596,15 @@ static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void AddVector_C(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
int size) {
|
||||
static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) out[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
|
||||
static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) out[i] += a[i];
|
||||
}
|
||||
@ -611,8 +633,9 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b, VP8LHistogram* const out) {
|
||||
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
|
||||
const VP8LHistogram* WEBP_RESTRICT const b,
|
||||
VP8LHistogram* WEBP_RESTRICT const out) {
|
||||
int i;
|
||||
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
@ -642,14 +665,14 @@ void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
// Image transforms.
|
||||
|
||||
static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], ARGB_BLACK);
|
||||
(void)upper;
|
||||
}
|
||||
|
||||
static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], in[i - 1]);
|
||||
(void)upper;
|
||||
@ -660,7 +683,8 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \
|
||||
static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \
|
||||
const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
@ -698,6 +722,7 @@ VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;
|
||||
VP8LCostFunc VP8LExtraCost;
|
||||
VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||
VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||
VP8LShannonEntropyFunc VP8LShannonEntropy;
|
||||
|
||||
VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
|
||||
VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
|
||||
@ -737,6 +762,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
||||
VP8LExtraCost = ExtraCost_C;
|
||||
VP8LExtraCostCombined = ExtraCostCombined_C;
|
||||
VP8LCombinedShannonEntropy = CombinedShannonEntropy_C;
|
||||
VP8LShannonEntropy = ShannonEntropy_C;
|
||||
|
||||
VP8LGetEntropyUnrefined = GetEntropyUnrefined_C;
|
||||
VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_C;
|
||||
@ -826,6 +852,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
||||
assert(VP8LExtraCost != NULL);
|
||||
assert(VP8LExtraCostCombined != NULL);
|
||||
assert(VP8LCombinedShannonEntropy != NULL);
|
||||
assert(VP8LShannonEntropy != NULL);
|
||||
assert(VP8LGetEntropyUnrefined != NULL);
|
||||
assert(VP8LGetCombinedEntropyUnrefined != NULL);
|
||||
assert(VP8LAddVector != NULL);
|
||||
|
@ -149,8 +149,9 @@ static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
// pY += 2;
|
||||
// }
|
||||
// return cost;
|
||||
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* WEBP_RESTRICT const X,
|
||||
const uint32_t* WEBP_RESTRICT const Y,
|
||||
int length) {
|
||||
int i, temp0, temp1, temp2, temp3;
|
||||
const uint32_t* pX = &X[4];
|
||||
const uint32_t* pY = &Y[4];
|
||||
@ -215,8 +216,10 @@ static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
|
||||
// Returns the various RLE counts
|
||||
static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
|
||||
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
|
||||
int* WEBP_RESTRICT const i_prev,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int* const pstreaks = &stats->streaks[0][0];
|
||||
int* const pcnts = &stats->counts[0];
|
||||
int temp0, temp1, temp2, temp3;
|
||||
@ -241,9 +244,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
*i_prev = i;
|
||||
}
|
||||
|
||||
static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetEntropyUnrefined_MIPS32(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i;
|
||||
int i_prev = 0;
|
||||
uint32_t x_prev = X[0];
|
||||
@ -262,11 +266,10 @@ static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
const uint32_t Y[],
|
||||
int length,
|
||||
VP8LBitEntropy* const entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetCombinedEntropyUnrefined_MIPS32(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i = 1;
|
||||
int i_prev = 0;
|
||||
uint32_t xy_prev = X[0] + Y[0];
|
||||
@ -344,8 +347,9 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
ASM_END_COMMON_0 \
|
||||
ASM_END_COMMON_1
|
||||
|
||||
static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
uint32_t* pout, int size) {
|
||||
static void AddVector_MIPS32(const uint32_t* WEBP_RESTRICT pa,
|
||||
const uint32_t* WEBP_RESTRICT pb,
|
||||
uint32_t* WEBP_RESTRICT pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
@ -356,7 +360,8 @@ static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
|
||||
static void AddVectorEq_MIPS32(const uint32_t* WEBP_RESTRICT pa,
|
||||
uint32_t* WEBP_RESTRICT pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
|
@ -78,8 +78,9 @@ static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
|
||||
return (uint32_t)((int)(color_pred) * color) >> 5;
|
||||
}
|
||||
|
||||
static void TransformColor_MIPSdspR2(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels) {
|
||||
static void TransformColor_MIPSdspR2(
|
||||
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT data,
|
||||
int num_pixels) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
uint32_t argb, argb1, new_red, new_red1;
|
||||
const uint32_t G_to_R = m->green_to_red_;
|
||||
@ -172,7 +173,8 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
|
||||
}
|
||||
|
||||
static void CollectColorBlueTransforms_MIPSdspR2(
|
||||
const uint32_t* argb, int stride, int tile_width, int tile_height,
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue, uint32_t histo[]) {
|
||||
const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
|
||||
const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
|
||||
@ -221,11 +223,9 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
|
||||
return (new_red & 0xff);
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb,
|
||||
int stride, int tile_width,
|
||||
int tile_height,
|
||||
int green_to_red,
|
||||
uint32_t histo[]) {
|
||||
static void CollectColorRedTransforms_MIPSdspR2(
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height, int green_to_red, uint32_t histo[]) {
|
||||
const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
|
||||
while (tile_height-- > 0) {
|
||||
int x;
|
||||
|
@ -48,8 +48,8 @@
|
||||
dst = VSHF_UB(src, t0, mask1); \
|
||||
} while (0)
|
||||
|
||||
static void TransformColor_MSA(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
static void TransformColor_MSA(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels) {
|
||||
v16u8 src0, dst0;
|
||||
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
|
||||
(m->green_to_red_ << 16));
|
||||
|
@ -72,8 +72,9 @@ static void SubtractGreenFromBlueAndRed_NEON(uint32_t* argb_data,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static void TransformColor_NEON(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
static void TransformColor_NEON(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels) {
|
||||
// sign-extended multiplying constants, pre-shifted by 6.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 6)
|
||||
const int16_t rb[8] = {
|
||||
|
@ -49,8 +49,9 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
static void TransformColor_SSE2(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels) {
|
||||
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
|
||||
CST_5b(m->green_to_blue_));
|
||||
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
|
||||
@ -79,7 +80,8 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#define SPAN 8
|
||||
static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
static void CollectColorBlueTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
uint32_t histo[]) {
|
||||
@ -126,7 +128,8 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
static void CollectColorRedTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, uint32_t histo[]) {
|
||||
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red));
|
||||
@ -172,62 +175,102 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
|
||||
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
||||
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
||||
#define LINE_SIZE 16 // 8 or 16
|
||||
static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~15;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 16);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||
#endif
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&b[i + 8]);
|
||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&b[i + 12]);
|
||||
#endif
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
#if (LINE_SIZE == 16)
|
||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
i += 16;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 8) != 0) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
i += 8;
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] = a[i] + b[i];
|
||||
|
||||
size &= 7;
|
||||
if (size == 4) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
|
||||
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
} else if (size == 2) {
|
||||
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&b[i]);
|
||||
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
}
|
||||
}
|
||||
|
||||
static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
static void AddVectorEq_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~15;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 16);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||
#endif
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&out[i + 8]);
|
||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&out[i + 12]);
|
||||
#endif
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
#if (LINE_SIZE == 16)
|
||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
i += 16;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 8) != 0) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
i += 8;
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] += a[i];
|
||||
|
||||
size &= 7;
|
||||
if (size == 4) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i]);
|
||||
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
} else if (size == 2) {
|
||||
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&out[i]);
|
||||
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
}
|
||||
}
|
||||
#undef LINE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entropy
|
||||
@ -333,8 +376,9 @@ static int VectorMismatch_SSE2(const uint32_t* const array1,
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst) {
|
||||
static void BundleColorMap_SSE2(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits,
|
||||
uint32_t* WEBP_RESTRICT dst) {
|
||||
int x;
|
||||
assert(xbits >= 0);
|
||||
assert(xbits <= 3);
|
||||
@ -423,7 +467,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -440,7 +484,8 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* const in, \
|
||||
const uint32_t* const upper, \
|
||||
int num_pixels, uint32_t* const out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT const out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
|
||||
@ -462,7 +507,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1]) // Predictor4: TL
|
||||
|
||||
// Predictor5: avg2(avg2(L, TR), T)
|
||||
static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -482,7 +527,8 @@ static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, A, B) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i tA = _mm_loadu_si128((const __m128i*)&(A)); \
|
||||
@ -506,7 +552,7 @@ GENERATE_PREDICTOR_2(9, upper[i], upper[i + 1]) // Predictor9: average(T, TR)
|
||||
|
||||
// Predictor10: avg(avg(L,TL), avg(T, TR)).
|
||||
static void PredictorSub10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -541,7 +587,7 @@ static void GetSumAbsDiff32_SSE2(const __m128i* const A, const __m128i* const B,
|
||||
}
|
||||
|
||||
static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -567,7 +613,7 @@ static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor12: ClampedSubSubtractFull.
|
||||
static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -596,7 +642,7 @@ static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictors13: ClampedAddSubtractHalf
|
||||
static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 2 <= num_pixels; i += 2) {
|
||||
|
@ -44,8 +44,9 @@ static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
|
||||
return HorizontalSum_SSE41(cost);
|
||||
}
|
||||
|
||||
static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a,
|
||||
const uint32_t* const b, int length) {
|
||||
static uint32_t ExtraCostCombined_SSE41(const uint32_t* WEBP_RESTRICT const a,
|
||||
const uint32_t* WEBP_RESTRICT const b,
|
||||
int length) {
|
||||
int i;
|
||||
__m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
|
||||
_mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
|
||||
@ -95,7 +96,8 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
static void CollectColorBlueTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
uint32_t histo[]) {
|
||||
@ -141,7 +143,8 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
static void CollectColorRedTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red,
|
||||
uint32_t histo[]) {
|
||||
|
@ -26,8 +26,8 @@
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
|
||||
// gcc-4.8.x at least.
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -41,8 +41,8 @@ static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -53,8 +53,8 @@ static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -71,8 +71,8 @@ static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
|
||||
static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 };
|
||||
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~1);
|
||||
const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
|
||||
for (; src < end; src += 2) {
|
||||
@ -89,8 +89,8 @@ static const uint8_t kBGRShuffle[3][8] = {
|
||||
{ 21, 22, 24, 25, 26, 28, 29, 30 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
|
||||
@ -116,8 +116,8 @@ static const uint8_t kRGBShuffle[3][8] = {
|
||||
{ 21, 20, 26, 25, 24, 30, 29, 28 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
|
||||
@ -209,7 +209,7 @@ static uint32_t Predictor13_NEON(const uint32_t* const left,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const uint8x16_t black = vreinterpretq_u8_u32(vdupq_n_u32(ARGB_BLACK));
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -222,7 +222,7 @@ static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const uint8x16_t zero = LOADQ_U32_AS_U8(0);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -248,7 +248,7 @@ static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorAdd##X##_NEON(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* out) { \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
|
||||
@ -276,7 +276,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -301,7 +301,7 @@ static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor6: average(left, TL)
|
||||
static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -317,7 +317,7 @@ static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor7: average(left, T)
|
||||
static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -335,7 +335,7 @@ static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_2(X, IN) \
|
||||
static void PredictorAdd##X##_NEON(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* out) { \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
|
||||
@ -363,7 +363,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -394,7 +394,7 @@ static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -427,7 +427,7 @@ static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint16x8_t L = vmovl_u8(LOAD_U32_AS_U8(out[-1]));
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -468,7 +468,7 @@ static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
|
@ -186,7 +186,7 @@ static uint32_t Predictor13_SSE2(const uint32_t* const left,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -202,7 +202,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m128i prev = _mm_set1_epi32((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -230,7 +230,8 @@ static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
// per 8 bit channel.
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
|
||||
@ -259,7 +260,8 @@ GENERATE_PREDICTOR_ADD(Predictor7_SSE2, PredictorAdd7_SSE2)
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, IN) \
|
||||
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i Tother = _mm_loadu_si128((const __m128i*)&(IN)); \
|
||||
@ -297,7 +299,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -344,7 +346,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m128i pa;
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
@ -395,7 +397,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]);
|
||||
@ -490,8 +492,8 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
|
||||
static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
|
||||
@ -526,8 +528,8 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
@ -554,8 +556,9 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
|
||||
const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
@ -590,8 +593,9 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0);
|
||||
const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8);
|
||||
const __m128i mask_0x07 = _mm_set1_epi8(0x07);
|
||||
@ -631,8 +635,8 @@ static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
|
||||
const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
|
@ -77,8 +77,8 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_SSE41(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9,
|
||||
@ -95,8 +95,8 @@ static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_SSE41(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_SSE41(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,
|
||||
|
@ -26,8 +26,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Row import
|
||||
|
||||
void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
void WebPRescalerImportRowExpand_C(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
int channel;
|
||||
@ -59,8 +59,8 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
|
||||
}
|
||||
}
|
||||
|
||||
void WebPRescalerImportRowShrink_C(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
void WebPRescalerImportRowShrink_C(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
int channel;
|
||||
@ -158,7 +158,8 @@ void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Main entry calls
|
||||
|
||||
void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
void WebPRescalerImportRow(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
assert(!WebPRescalerInputDone(wrk));
|
||||
if (!wrk->x_expand) {
|
||||
WebPRescalerImportRowShrink(wrk, src);
|
||||
|
@ -21,8 +21,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Row import
|
||||
|
||||
static void ImportRowShrink_MIPS32(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void ImportRowShrink_MIPS32(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
const int fx_scale = wrk->fx_scale;
|
||||
@ -81,8 +81,8 @@ static void ImportRowShrink_MIPS32(WebPRescaler* const wrk,
|
||||
}
|
||||
}
|
||||
|
||||
static void ImportRowExpand_MIPS32(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void ImportRowExpand_MIPS32(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
const int x_add = wrk->x_add;
|
||||
|
@ -114,9 +114,9 @@
|
||||
dst = __msa_copy_s_w((v4i32)t0, 0); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void ExportRowExpand_0(const uint32_t* frow, uint8_t* dst,
|
||||
int length,
|
||||
WebPRescaler* const wrk) {
|
||||
static WEBP_INLINE void ExportRowExpand_0(
|
||||
const uint32_t* WEBP_RESTRICT frow, uint8_t* WEBP_RESTRICT dst, int length,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
const v4u32 scale = (v4u32)__msa_fill_w(wrk->fy_scale);
|
||||
const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX);
|
||||
const v4i32 zero = { 0 };
|
||||
@ -171,9 +171,10 @@ static WEBP_INLINE void ExportRowExpand_0(const uint32_t* frow, uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ExportRowExpand_1(const uint32_t* frow, uint32_t* irow,
|
||||
uint8_t* dst, int length,
|
||||
WebPRescaler* const wrk) {
|
||||
static WEBP_INLINE void ExportRowExpand_1(
|
||||
const uint32_t* WEBP_RESTRICT frow, uint32_t* WEBP_RESTRICT irow,
|
||||
uint8_t* WEBP_RESTRICT dst, int length,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
|
||||
const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
|
||||
const v4i32 B1 = __msa_fill_w(B);
|
||||
@ -262,10 +263,10 @@ static void RescalerExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
|
||||
}
|
||||
|
||||
#if 0 // disabled for now. TODO(skal): make match the C-code
|
||||
static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow,
|
||||
uint8_t* dst, int length,
|
||||
const uint32_t yscale,
|
||||
WebPRescaler* const wrk) {
|
||||
static WEBP_INLINE void ExportRowShrink_0(
|
||||
const uint32_t* WEBP_RESTRICT frow, uint32_t* WEBP_RESTRICT irow,
|
||||
uint8_t* WEBP_RESTRICT dst, int length, const uint32_t yscale,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
const v4u32 y_scale = (v4u32)__msa_fill_w(yscale);
|
||||
const v4u32 fxyscale = (v4u32)__msa_fill_w(wrk->fxy_scale);
|
||||
const v4u32 shiftval = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX);
|
||||
@ -348,9 +349,9 @@ static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ExportRowShrink_1(uint32_t* irow, uint8_t* dst,
|
||||
int length,
|
||||
WebPRescaler* const wrk) {
|
||||
static WEBP_INLINE void ExportRowShrink_1(
|
||||
uint32_t* WEBP_RESTRICT irow, uint8_t* WEBP_RESTRICT dst, int length,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
const v4u32 scale = (v4u32)__msa_fill_w(wrk->fxy_scale);
|
||||
const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX);
|
||||
const v4i32 zero = { 0 };
|
||||
|
@ -45,8 +45,8 @@
|
||||
#error "MULT_FIX/WEBP_RESCALER_RFIX need some more work"
|
||||
#endif
|
||||
|
||||
static uint32x4_t Interpolate_NEON(const rescaler_t* const frow,
|
||||
const rescaler_t* const irow,
|
||||
static uint32x4_t Interpolate_NEON(const rescaler_t* WEBP_RESTRICT const frow,
|
||||
const rescaler_t* WEBP_RESTRICT const irow,
|
||||
uint32_t A, uint32_t B) {
|
||||
LOAD_32x4(frow, A0);
|
||||
LOAD_32x4(irow, B0);
|
||||
|
@ -43,8 +43,8 @@ static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
||||
*out = _mm_unpacklo_epi8(A, zero);
|
||||
}
|
||||
|
||||
static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void RescalerImportRowExpand_SSE2(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
rescaler_t* frow = wrk->frow;
|
||||
const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels;
|
||||
const int x_add = wrk->x_add;
|
||||
@ -109,8 +109,8 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
assert(accum == 0);
|
||||
}
|
||||
|
||||
static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void RescalerImportRowShrink_SSE2(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_sub = wrk->x_sub;
|
||||
int accum = 0;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -168,12 +168,10 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
|
||||
// Row export
|
||||
|
||||
// load *src as epi64, multiply by mult and store result in [out0 ... out3]
|
||||
static WEBP_INLINE void LoadDispatchAndMult_SSE2(const rescaler_t* const src,
|
||||
const __m128i* const mult,
|
||||
__m128i* const out0,
|
||||
__m128i* const out1,
|
||||
__m128i* const out2,
|
||||
__m128i* const out3) {
|
||||
static WEBP_INLINE void LoadDispatchAndMult_SSE2(
|
||||
const rescaler_t* WEBP_RESTRICT const src, const __m128i* const mult,
|
||||
__m128i* const out0, __m128i* const out1, __m128i* const out2,
|
||||
__m128i* const out3) {
|
||||
const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 0));
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4));
|
||||
const __m128i A2 = _mm_srli_epi64(A0, 32);
|
||||
|
@ -35,10 +35,14 @@ WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
|
||||
#define LOAD_UV(u, v) ((u) | ((v) << 16))
|
||||
|
||||
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int x; \
|
||||
const int last_pixel_pair = (len - 1) >> 1; \
|
||||
uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \
|
||||
@ -136,10 +140,14 @@ static void EmptyUpsampleFunc(const uint8_t* top_y, const uint8_t* bottom_y,
|
||||
|
||||
#if !defined(FANCY_UPSAMPLING)
|
||||
#define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* bot_u, const uint8_t* bot_v, \
|
||||
uint8_t* top_dst, uint8_t* bot_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bot_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT bot_u, \
|
||||
const uint8_t* WEBP_RESTRICT bot_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bot_dst, int len) { \
|
||||
const int half_len = len >> 1; \
|
||||
int x; \
|
||||
assert(top_dst != NULL); \
|
||||
@ -178,10 +186,14 @@ WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
|
||||
// YUV444 converter
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
extern void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
extern void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len); \
|
||||
void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * (XSTEP)]); \
|
||||
}
|
||||
|
@ -143,10 +143,14 @@ static WEBP_INLINE void YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
|
||||
#define LOAD_UV(u, v) ((u) | ((v) << 16))
|
||||
|
||||
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int x; \
|
||||
const int last_pixel_pair = (len - 1) >> 1; \
|
||||
uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \
|
||||
@ -241,8 +245,10 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersMIPSdspR2(void) {
|
||||
// YUV444 converter
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \
|
||||
}
|
||||
|
@ -320,8 +320,10 @@ static void YuvToRgba(uint8_t y, uint8_t u, uint8_t v, uint8_t* const rgba) {
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
static void YuvToRgbLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToRgbLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
while (length >= 16) {
|
||||
CALC_RGB16(y, u, v, R, G, B);
|
||||
@ -347,8 +349,10 @@ static void YuvToRgbLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToBgrLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
while (length >= 16) {
|
||||
CALC_RGB16(y, u, v, R, G, B);
|
||||
@ -375,8 +379,10 @@ static void YuvToBgrLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToRgbaLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
@ -403,8 +409,10 @@ static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToBgraLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
@ -432,8 +440,10 @@ static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
static void YuvToArgbLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToArgbLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
@ -460,8 +470,10 @@ static void YuvToArgbLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToRgba4444Line(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B, RG, BA, tmp0, tmp1;
|
||||
while (length >= 16) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
@ -496,8 +508,10 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToRgb565Line(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B, RG, GB, tmp0, tmp1;
|
||||
while (length >= 16) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
@ -564,11 +578,14 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
|
||||
} while (0)
|
||||
|
||||
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bot_dst, int len) \
|
||||
{ \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bot_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bot_dst, int len) { \
|
||||
int size = (len - 1) >> 1; \
|
||||
uint8_t temp_u[64]; \
|
||||
uint8_t temp_v[64]; \
|
||||
|
@ -58,8 +58,9 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample16Pixels_NEON(const uint8_t* r1, const uint8_t* r2,
|
||||
uint8_t* out) {
|
||||
static void Upsample16Pixels_NEON(const uint8_t* WEBP_RESTRICT const r1,
|
||||
const uint8_t* WEBP_RESTRICT const r2,
|
||||
uint8_t* WEBP_RESTRICT const out) {
|
||||
UPSAMPLE_16PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@ -190,10 +191,14 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
|
||||
}
|
||||
|
||||
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int block; \
|
||||
/* 16 byte aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[2 * 32 + 15]; \
|
||||
|
@ -88,8 +88,9 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
|
||||
uint8_t* const out) {
|
||||
static void Upsample32Pixels_SSE2(const uint8_t* WEBP_RESTRICT const r1,
|
||||
const uint8_t* WEBP_RESTRICT const r2,
|
||||
uint8_t* WEBP_RESTRICT const out) {
|
||||
UPSAMPLE_32PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@ -114,10 +115,14 @@ static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
|
||||
} while (0)
|
||||
|
||||
#define SSE2_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int uv_pos, pos; \
|
||||
/* 16byte-aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
|
||||
@ -215,10 +220,14 @@ extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
extern void WebPInitYUV444ConvertersSSE2(void);
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
|
||||
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
extern void CALL_C(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
const int max_len = len & ~31; \
|
||||
for (i = 0; i < max_len; i += 32) { \
|
||||
|
@ -90,8 +90,9 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
|
||||
uint8_t* const out) {
|
||||
static void Upsample32Pixels_SSE41(const uint8_t* WEBP_RESTRICT const r1,
|
||||
const uint8_t* WEBP_RESTRICT const r2,
|
||||
uint8_t* WEBP_RESTRICT const out) {
|
||||
UPSAMPLE_32PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@ -116,10 +117,14 @@ static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
|
||||
} while (0)
|
||||
|
||||
#define SSE4_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int uv_pos, pos; \
|
||||
/* 16byte-aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
|
||||
@ -202,10 +207,14 @@ extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
extern void WebPInitYUV444ConvertersSSE41(void);
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
|
||||
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
extern void CALL_C(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
const int max_len = len & ~31; \
|
||||
for (i = 0; i < max_len; i += 32) { \
|
||||
|
@ -20,9 +20,10 @@
|
||||
// Plain-C version
|
||||
|
||||
#define ROW_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
const uint8_t* const end = dst + (len & ~1) * (XSTEP); \
|
||||
while (dst != end) { \
|
||||
FUNC(y[0], u[0], v[0], dst); \
|
||||
@ -49,9 +50,10 @@ ROW_FUNC(YuvToRgb565Row, VP8YuvToRgb565, 2)
|
||||
#undef ROW_FUNC
|
||||
|
||||
// Main call for processing a plane with a WebPSamplerRowFunc function:
|
||||
void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||
const uint8_t* u, const uint8_t* v, int uv_stride,
|
||||
uint8_t* dst, int dst_stride,
|
||||
void WebPSamplerProcessPlane(const uint8_t* WEBP_RESTRICT y, int y_stride,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v, int uv_stride,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride,
|
||||
int width, int height, WebPSamplerRowFunc func) {
|
||||
int j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
@ -117,7 +119,8 @@ WEBP_DSP_INIT_FUNC(WebPInitSamplers) {
|
||||
//-----------------------------------------------------------------------------
|
||||
// ARGB -> YUV converters
|
||||
|
||||
static void ConvertARGBToY_C(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
const uint32_t p = argb[i];
|
||||
@ -126,7 +129,8 @@ static void ConvertARGBToY_C(const uint32_t* argb, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store) {
|
||||
// No rounding. Last pixel is dealt with separately.
|
||||
const int uv_width = src_width >> 1;
|
||||
@ -169,22 +173,25 @@ void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
static void ConvertRGB24ToY_C(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_C(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i, rgb += 3) {
|
||||
y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_C(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_C(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i, bgr += 3) {
|
||||
y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; i += 1, rgb += 4) {
|
||||
const int r = rgb[0], g = rgb[1], b = rgb[2];
|
||||
@ -195,13 +202,18 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
void (*WebPConvertRGB24ToY)(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
void (*WebPConvertBGR24ToY)(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
|
||||
void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
||||
void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store);
|
||||
|
||||
extern void WebPInitConvertARGBToYUVSSE2(void);
|
||||
|
@ -11,15 +11,15 @@
|
||||
//
|
||||
// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
|
||||
// More information at: https://en.wikipedia.org/wiki/YCbCr
|
||||
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
|
||||
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
|
||||
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
|
||||
// Y = 0.2568 * R + 0.5041 * G + 0.0979 * B + 16
|
||||
// U = -0.1482 * R - 0.2910 * G + 0.4392 * B + 128
|
||||
// V = 0.4392 * R - 0.3678 * G - 0.0714 * B + 128
|
||||
// We use 16bit fixed point operations for RGB->YUV conversion (YUV_FIX).
|
||||
//
|
||||
// For the Y'CbCr to RGB conversion, the BT.601 specification reads:
|
||||
// R = 1.164 * (Y-16) + 1.596 * (V-128)
|
||||
// G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128)
|
||||
// B = 1.164 * (Y-16) + 2.018 * (U-128)
|
||||
// G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.392 * (U-128)
|
||||
// B = 1.164 * (Y-16) + 2.017 * (U-128)
|
||||
// where Y is in the [16,235] range, and U/V in the [16,240] range.
|
||||
//
|
||||
// The fixed-point implementation used here is:
|
||||
@ -149,20 +149,34 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst);
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
||||
|
||||
@ -172,10 +186,14 @@ void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
|
||||
#endif // WEBP_USE_SSE41
|
||||
|
||||
|
@ -22,9 +22,10 @@
|
||||
// simple point-sampling
|
||||
|
||||
#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A) \
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i, r, g, b; \
|
||||
int temp0, temp1, temp2, temp3, temp4; \
|
||||
for (i = 0; i < (len >> 1); i++) { \
|
||||
|
@ -69,9 +69,10 @@
|
||||
: "memory", "hi", "lo" \
|
||||
|
||||
#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A) \
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; \
|
||||
const int t_con_1 = 26149; \
|
||||
|
@ -46,7 +46,8 @@ static uint8x8_t ConvertRGBToY_NEON(const uint8x8_t R,
|
||||
return vqmovn_u16(Y2);
|
||||
}
|
||||
|
||||
static void ConvertRGB24ToY_NEON(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_NEON(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8, rgb += 3 * 8) {
|
||||
const uint8x8x3_t RGB = vld3_u8(rgb);
|
||||
@ -58,7 +59,8 @@ static void ConvertRGB24ToY_NEON(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_NEON(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_NEON(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8, bgr += 3 * 8) {
|
||||
const uint8x8x3_t BGR = vld3_u8(bgr);
|
||||
@ -70,7 +72,8 @@ static void ConvertBGR24ToY_NEON(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_NEON(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_NEON(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8) {
|
||||
const uint8x8x4_t RGB = vld4_u8((const uint8_t*)&argb[i]);
|
||||
@ -114,8 +117,9 @@ static void ConvertARGBToY_NEON(const uint32_t* argb, uint8_t* y, int width) {
|
||||
MULTIPLY_16b(28800, -24116, -4684, 128 << SHIFT, V_DST); \
|
||||
} while (0)
|
||||
|
||||
static void ConvertRGBA32ToUV_NEON(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
static void ConvertRGBA32ToUV_NEON(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8, rgb += 4 * 8) {
|
||||
const uint16x8x4_t RGB = vld4q_u16((const uint16_t*)rgb);
|
||||
@ -131,7 +135,9 @@ static void ConvertRGBA32ToUV_NEON(const uint16_t* rgb,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_NEON(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
static void ConvertARGBToUV_NEON(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store) {
|
||||
int i;
|
||||
for (i = 0; i + 16 <= src_width; i += 16, u += 8, v += 8) {
|
||||
|
@ -82,9 +82,9 @@ static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) {
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV444 to R/G/B
|
||||
static void YUV444ToRGB_SSE2(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
static void YUV444ToRGB_SSE2(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_HI_16_SSE2(u),
|
||||
@ -93,9 +93,9 @@ static void YUV444ToRGB_SSE2(const uint8_t* const y,
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV420 to R/G/B
|
||||
static void YUV420ToRGB_SSE2(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
static void YUV420ToRGB_SSE2(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_UV_HI_8_SSE2(u),
|
||||
@ -108,7 +108,7 @@ static WEBP_INLINE void PackAndStore4_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* const dst) {
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
const __m128i rb = _mm_packus_epi16(*R, *B);
|
||||
const __m128i ga = _mm_packus_epi16(*G, *A);
|
||||
const __m128i rg = _mm_unpacklo_epi8(rb, ga);
|
||||
@ -120,11 +120,9 @@ static WEBP_INLINE void PackAndStore4_SSE2(const __m128i* const R,
|
||||
}
|
||||
|
||||
// Pack R/G/B/A results into 16b output.
|
||||
static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* const dst) {
|
||||
static WEBP_INLINE void PackAndStore4444_SSE2(
|
||||
const __m128i* const R, const __m128i* const G, const __m128i* const B,
|
||||
const __m128i* const A, uint8_t* WEBP_RESTRICT const dst) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 0)
|
||||
const __m128i rg0 = _mm_packus_epi16(*R, *G);
|
||||
const __m128i ba0 = _mm_packus_epi16(*B, *A);
|
||||
@ -145,7 +143,7 @@ static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
|
||||
static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
uint8_t* const dst) {
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
const __m128i r0 = _mm_packus_epi16(*R, *R);
|
||||
const __m128i g0 = _mm_packus_epi16(*G, *G);
|
||||
const __m128i b0 = _mm_packus_epi16(*B, *B);
|
||||
@ -170,7 +168,7 @@ static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
|
||||
static WEBP_INLINE void PlanarTo24b_SSE2(__m128i* const in0, __m128i* const in1,
|
||||
__m128i* const in2, __m128i* const in3,
|
||||
__m128i* const in4, __m128i* const in5,
|
||||
uint8_t* const rgb) {
|
||||
uint8_t* WEBP_RESTRICT const rgb) {
|
||||
// The input is 6 registers of sixteen 8b but for the sake of explanation,
|
||||
// let's take 6 registers of four 8b values.
|
||||
// To pack, we will keep taking one every two 8b integer and move it
|
||||
@ -193,8 +191,10 @@ static WEBP_INLINE void PlanarTo24b_SSE2(__m128i* const in0, __m128i* const in1,
|
||||
_mm_storeu_si128((__m128i*)(rgb + 80), *in5);
|
||||
}
|
||||
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
@ -204,8 +204,10 @@ void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
@ -215,8 +217,10 @@ void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
@ -226,8 +230,10 @@ void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst) {
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
@ -237,8 +243,10 @@ void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
__m128i R, G, B;
|
||||
@ -247,8 +255,10 @@ void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
@ -269,8 +279,10 @@ void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
PlanarTo24b_SSE2(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
}
|
||||
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
@ -294,9 +306,10 @@ void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
//-----------------------------------------------------------------------------
|
||||
// Arbitrary-length row conversion functions
|
||||
|
||||
static void YuvToRgbaRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToRgbaRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
@ -316,9 +329,10 @@ static void YuvToRgbaRow_SSE2(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgraRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToBgraRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
@ -338,9 +352,10 @@ static void YuvToBgraRow_SSE2(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToArgbRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToArgbRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
@ -360,9 +375,10 @@ static void YuvToArgbRow_SSE2(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgbRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToRgbRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -397,9 +413,10 @@ static void YuvToRgbRow_SSE2(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToBgrRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -471,7 +488,7 @@ static WEBP_INLINE void RGB24PackedToPlanarHelper_SSE2(
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// Similar to PlanarTo24bHelper(), but in reverse order.
|
||||
static WEBP_INLINE void RGB24PackedToPlanar_SSE2(
|
||||
const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
|
||||
const uint8_t* WEBP_RESTRICT const rgb, __m128i* const out /*out[6]*/) {
|
||||
__m128i tmp[6];
|
||||
tmp[0] = _mm_loadu_si128((const __m128i*)(rgb + 0));
|
||||
tmp[1] = _mm_loadu_si128((const __m128i*)(rgb + 16));
|
||||
@ -488,8 +505,8 @@ static WEBP_INLINE void RGB24PackedToPlanar_SSE2(
|
||||
}
|
||||
|
||||
// Convert 8 packed ARGB to r[], g[], b[]
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE2(const uint32_t* const argb,
|
||||
__m128i* const rgb /*in[6]*/) {
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE2(
|
||||
const uint32_t* WEBP_RESTRICT const argb, __m128i* const rgb /*in[6]*/) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i a0 = LOAD_16(argb + 0);
|
||||
__m128i a1 = LOAD_16(argb + 4);
|
||||
@ -562,7 +579,8 @@ static WEBP_INLINE void ConvertRGBToUV_SSE2(const __m128i* const R,
|
||||
#undef MK_CST_16
|
||||
#undef TRANSFORM
|
||||
|
||||
static void ConvertRGB24ToY_SSE2(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_SSE2(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
|
||||
@ -596,7 +614,8 @@ static void ConvertRGB24ToY_SSE2(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_SSE2(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_SSE2(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
|
||||
@ -630,7 +649,8 @@ static void ConvertBGR24ToY_SSE2(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_SSE2(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~15;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 16) {
|
||||
@ -658,8 +678,9 @@ static void HorizontalAddPack_SSE2(const __m128i* const A,
|
||||
*out = _mm_packs_epi32(C, D);
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_SSE2(const uint32_t* argb,
|
||||
uint8_t* u, uint8_t* v,
|
||||
static void ConvertARGBToUV_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store) {
|
||||
const int max_width = src_width & ~31;
|
||||
int i;
|
||||
@ -695,7 +716,7 @@ static void ConvertARGBToUV_SSE2(const uint32_t* argb,
|
||||
|
||||
// Convert 16 packed ARGB 16b-values to r[], g[], b[]
|
||||
static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE2(
|
||||
const uint16_t* const rgbx,
|
||||
const uint16_t* WEBP_RESTRICT const rgbx,
|
||||
__m128i* const r, __m128i* const g, __m128i* const b) {
|
||||
const __m128i in0 = LOAD_16(rgbx + 0); // r0 | g0 | b0 |x| r1 | g1 | b1 |x
|
||||
const __m128i in1 = LOAD_16(rgbx + 8); // r2 | g2 | b2 |x| r3 | g3 | b3 |x
|
||||
@ -715,8 +736,9 @@ static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE2(
|
||||
*b = _mm_unpacklo_epi64(B1, B3);
|
||||
}
|
||||
|
||||
static void ConvertRGBA32ToUV_SSE2(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
static void ConvertRGBA32ToUV_SSE2(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width) {
|
||||
const int max_width = width & ~15;
|
||||
const uint16_t* const last_rgb = rgb + 4 * max_width;
|
||||
while (rgb < last_rgb) {
|
||||
|
@ -82,9 +82,9 @@ static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) {
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV444 to R/G/B
|
||||
static void YUV444ToRGB_SSE41(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
static void YUV444ToRGB_SSE41(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_HI_16_SSE41(u),
|
||||
@ -93,9 +93,9 @@ static void YUV444ToRGB_SSE41(const uint8_t* const y,
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV420 to R/G/B
|
||||
static void YUV420ToRGB_SSE41(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
static void YUV420ToRGB_SSE41(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_UV_HI_8_SSE41(u),
|
||||
@ -109,7 +109,7 @@ static void YUV420ToRGB_SSE41(const uint8_t* const y,
|
||||
static WEBP_INLINE void PlanarTo24b_SSE41(
|
||||
__m128i* const in0, __m128i* const in1, __m128i* const in2,
|
||||
__m128i* const in3, __m128i* const in4, __m128i* const in5,
|
||||
uint8_t* const rgb) {
|
||||
uint8_t* WEBP_RESTRICT const rgb) {
|
||||
// The input is 6 registers of sixteen 8b but for the sake of explanation,
|
||||
// let's take 6 registers of four 8b values.
|
||||
// To pack, we will keep taking one every two 8b integer and move it
|
||||
@ -132,8 +132,10 @@ static WEBP_INLINE void PlanarTo24b_SSE41(
|
||||
_mm_storeu_si128((__m128i*)(rgb + 80), *in5);
|
||||
}
|
||||
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
@ -154,8 +156,10 @@ void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
PlanarTo24b_SSE41(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
}
|
||||
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
@ -179,9 +183,10 @@ void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
//-----------------------------------------------------------------------------
|
||||
// Arbitrary-length row conversion functions
|
||||
|
||||
static void YuvToRgbRow_SSE41(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToRgbRow_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -216,9 +221,10 @@ static void YuvToRgbRow_SSE41(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrRow_SSE41(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToBgrRow_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -290,7 +296,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE41(void) {
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// Similar to PlanarTo24bHelper(), but in reverse order.
|
||||
static WEBP_INLINE void RGB24PackedToPlanar_SSE41(
|
||||
const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
|
||||
const uint8_t* WEBP_RESTRICT const rgb, __m128i* const out /*out[6]*/) {
|
||||
const __m128i A0 = _mm_loadu_si128((const __m128i*)(rgb + 0));
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)(rgb + 16));
|
||||
const __m128i A2 = _mm_loadu_si128((const __m128i*)(rgb + 32));
|
||||
@ -334,7 +340,7 @@ static WEBP_INLINE void RGB24PackedToPlanar_SSE41(
|
||||
|
||||
// Convert 8 packed ARGB to r[], g[], b[]
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE41(
|
||||
const uint32_t* const argb, __m128i* const rgb /*in[6]*/) {
|
||||
const uint32_t* WEBP_RESTRICT const argb, __m128i* const rgb /*in[6]*/) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i a0 = LOAD_16(argb + 0);
|
||||
__m128i a1 = LOAD_16(argb + 4);
|
||||
@ -407,7 +413,8 @@ static WEBP_INLINE void ConvertRGBToUV_SSE41(const __m128i* const R,
|
||||
#undef MK_CST_16
|
||||
#undef TRANSFORM
|
||||
|
||||
static void ConvertRGB24ToY_SSE41(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_SSE41(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
|
||||
@ -441,7 +448,8 @@ static void ConvertRGB24ToY_SSE41(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_SSE41(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_SSE41(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
|
||||
@ -475,7 +483,8 @@ static void ConvertBGR24ToY_SSE41(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_SSE41(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~15;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 16) {
|
||||
@ -503,8 +512,9 @@ static void HorizontalAddPack_SSE41(const __m128i* const A,
|
||||
*out = _mm_packs_epi32(C, D);
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_SSE41(const uint32_t* argb,
|
||||
uint8_t* u, uint8_t* v,
|
||||
static void ConvertARGBToUV_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store) {
|
||||
const int max_width = src_width & ~31;
|
||||
int i;
|
||||
@ -540,7 +550,7 @@ static void ConvertARGBToUV_SSE41(const uint32_t* argb,
|
||||
|
||||
// Convert 16 packed ARGB 16b-values to r[], g[], b[]
|
||||
static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE41(
|
||||
const uint16_t* const rgbx,
|
||||
const uint16_t* WEBP_RESTRICT const rgbx,
|
||||
__m128i* const r, __m128i* const g, __m128i* const b) {
|
||||
const __m128i in0 = LOAD_16(rgbx + 0); // r0 | g0 | b0 |x| r1 | g1 | b1 |x
|
||||
const __m128i in1 = LOAD_16(rgbx + 8); // r2 | g2 | b2 |x| r3 | g3 | b3 |x
|
||||
@ -570,8 +580,9 @@ static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE41(
|
||||
*b = _mm_unpackhi_epi64(B1, B3);
|
||||
}
|
||||
|
||||
static void ConvertRGBA32ToUV_SSE41(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
static void ConvertRGBA32ToUV_SSE41(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width) {
|
||||
const int max_width = width & ~15;
|
||||
const uint16_t* const last_rgb = rgb + 4 * max_width;
|
||||
while (rgb < last_rgb) {
|
||||
|
@ -287,7 +287,7 @@ static uint64_t FinalHuffmanCost(const VP8LStreaks* const stats) {
|
||||
uint64_t retval = InitialHuffmanCost();
|
||||
// Second coefficient: Many zeros in the histogram are covered efficiently
|
||||
// by a run-length encode. Originally 2/8.
|
||||
uint64_t retval_extra = stats->counts[0] * 1600 + 240 * stats->streaks[0][1];
|
||||
uint32_t retval_extra = stats->counts[0] * 1600 + 240 * stats->streaks[0][1];
|
||||
// Second coefficient: Constant values are encoded less efficiently, but still
|
||||
// RLE'ed. Originally 6/8.
|
||||
retval_extra += stats->counts[1] * 2640 + 720 * stats->streaks[1][1];
|
||||
@ -296,7 +296,7 @@ static uint64_t FinalHuffmanCost(const VP8LStreaks* const stats) {
|
||||
retval_extra += 1840 * stats->streaks[0][0];
|
||||
// Originally 26/8.
|
||||
retval_extra += 3360 * stats->streaks[1][0];
|
||||
return retval + (retval_extra << (LOG_2_PRECISION_BITS - 10));
|
||||
return retval + ((uint64_t)retval_extra << (LOG_2_PRECISION_BITS - 10));
|
||||
}
|
||||
|
||||
// Get the symbol entropy for the distribution 'population'.
|
||||
@ -601,11 +601,11 @@ static void HistogramBuild(
|
||||
}
|
||||
|
||||
// Copies the histograms and computes its bit_cost.
|
||||
static const uint16_t kInvalidHistogramSymbol = (uint16_t)(-1);
|
||||
static const uint32_t kInvalidHistogramSymbol = (uint32_t)(-1);
|
||||
static void HistogramCopyAndAnalyze(VP8LHistogramSet* const orig_histo,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
int* const num_used,
|
||||
uint16_t* const histogram_symbols) {
|
||||
uint32_t* const histogram_symbols) {
|
||||
int i, cluster_id;
|
||||
int num_used_orig = *num_used;
|
||||
VP8LHistogram** const orig_histograms = orig_histo->histograms;
|
||||
@ -667,7 +667,7 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
|
||||
// 'combine_cost_factor' has to be divided by 100.
|
||||
static void HistogramCombineEntropyBin(
|
||||
VP8LHistogramSet* const image_histo, int* num_used,
|
||||
const uint16_t* const clusters, uint16_t* const cluster_mappings,
|
||||
const uint32_t* const clusters, uint16_t* const cluster_mappings,
|
||||
VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins,
|
||||
int32_t combine_cost_factor, int low_effort) {
|
||||
VP8LHistogram** const histograms = image_histo->histograms;
|
||||
@ -1070,7 +1070,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
||||
// Note: we assume that out[]->bit_cost_ is already up-to-date.
|
||||
static void HistogramRemap(const VP8LHistogramSet* const in,
|
||||
VP8LHistogramSet* const out,
|
||||
uint16_t* const symbols) {
|
||||
uint32_t* const symbols) {
|
||||
int i;
|
||||
VP8LHistogram** const in_histo = in->histograms;
|
||||
VP8LHistogram** const out_histo = out->histograms;
|
||||
@ -1131,10 +1131,10 @@ static int32_t GetCombineCostFactor(int histo_size, int quality) {
|
||||
// assign the smallest possible clusters values.
|
||||
static void OptimizeHistogramSymbols(const VP8LHistogramSet* const set,
|
||||
uint16_t* const cluster_mappings,
|
||||
int num_clusters,
|
||||
uint32_t num_clusters,
|
||||
uint16_t* const cluster_mappings_tmp,
|
||||
uint16_t* const symbols) {
|
||||
int i, cluster_max;
|
||||
uint32_t* const symbols) {
|
||||
uint32_t i, cluster_max;
|
||||
int do_continue = 1;
|
||||
// First, assign the lowest cluster to each pixel.
|
||||
while (do_continue) {
|
||||
@ -1158,7 +1158,7 @@ static void OptimizeHistogramSymbols(const VP8LHistogramSet* const set,
|
||||
set->max_size * sizeof(*cluster_mappings_tmp));
|
||||
assert(cluster_mappings[0] == 0);
|
||||
// Re-map the ids.
|
||||
for (i = 0; i < set->max_size; ++i) {
|
||||
for (i = 0; i < (uint32_t)set->max_size; ++i) {
|
||||
int cluster;
|
||||
if (symbols[i] == kInvalidHistogramSymbol) continue;
|
||||
cluster = cluster_mappings[symbols[i]];
|
||||
@ -1172,7 +1172,7 @@ static void OptimizeHistogramSymbols(const VP8LHistogramSet* const set,
|
||||
|
||||
// Make sure all cluster values are used.
|
||||
cluster_max = 0;
|
||||
for (i = 0; i < set->max_size; ++i) {
|
||||
for (i = 0; i < (uint32_t)set->max_size; ++i) {
|
||||
if (symbols[i] == kInvalidHistogramSymbol) continue;
|
||||
if (symbols[i] <= cluster_max) continue;
|
||||
++cluster_max;
|
||||
@ -1195,7 +1195,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
int low_effort, int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
VP8LHistogram* const tmp_histo,
|
||||
uint16_t* const histogram_symbols,
|
||||
uint32_t* const histogram_symbols,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
const int histo_xsize =
|
||||
@ -1247,9 +1247,10 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
// Don't combine the histograms using stochastic and greedy heuristics for
|
||||
// low-effort compression mode.
|
||||
if (!low_effort || !entropy_combine) {
|
||||
const float x = quality / 100.f;
|
||||
// cubic ramp between 1 and MAX_HISTO_GREEDY:
|
||||
const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
|
||||
const int threshold_size =
|
||||
(int)(1 + DivRound(quality * quality * quality * (MAX_HISTO_GREEDY - 1),
|
||||
100 * 100 * 100));
|
||||
int do_greedy;
|
||||
if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size,
|
||||
&do_greedy)) {
|
||||
|
@ -109,7 +109,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
int low_effort, int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
VP8LHistogram* const tmp_histo,
|
||||
uint16_t* const histogram_symbols,
|
||||
uint32_t* const histogram_symbols,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent);
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -425,6 +426,15 @@ void VP8IteratorStartI4(VP8EncIterator* const it) {
|
||||
it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
|
||||
}
|
||||
}
|
||||
#if WEBP_AARCH64 && BPS == 32 && defined(WEBP_MSAN)
|
||||
// Intra4Preds_NEON() reads 3 uninitialized bytes from i4_boundary_ when top
|
||||
// is positioned at offset 29 (VP8TopLeftI4[3]). The values are not used
|
||||
// meaningfully, but due to limitations in MemorySanitizer related to
|
||||
// modeling of tbl instructions, a warning will be issued. This can be
|
||||
// removed if MSan is updated to support the instructions. See
|
||||
// https://issues.webmproject.org/372109644.
|
||||
memset(it->i4_boundary_ + sizeof(it->i4_boundary_) - 3, 0xaa, 3);
|
||||
#endif
|
||||
VP8IteratorNzToBytes(it); // import the non-zero context
|
||||
}
|
||||
|
||||
|
@ -14,54 +14,62 @@
|
||||
// Urvang Joshi (urvang@google.com)
|
||||
// Vincent Rabaud (vrabaud@google.com)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/enc/vp8li_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/encode.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#define MAX_DIFF_COST (1e30f)
|
||||
#define HISTO_SIZE (4 * 256)
|
||||
static const float kSpatialPredictorBias = 15.f;
|
||||
static const int64_t kSpatialPredictorBias = 15ll << LOG_2_PRECISION_BITS;
|
||||
static const int kPredLowEffort = 11;
|
||||
static const uint32_t kMaskAlpha = 0xff000000;
|
||||
static const int kNumPredModes = 14;
|
||||
|
||||
// Mostly used to reduce code size + readability
|
||||
static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
|
||||
static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; }
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Methods to calculate Entropy (Shannon).
|
||||
|
||||
// Compute a bias for prediction entropy using a global heuristic to favor
|
||||
// values closer to 0. Hence the final negative sign.
|
||||
static float PredictionCostBias(const uint32_t counts[256], int weight_0,
|
||||
float exp_val) {
|
||||
// 'exp_val' has a scaling factor of 1/100.
|
||||
static int64_t PredictionCostBias(const uint32_t counts[256], uint64_t weight_0,
|
||||
uint64_t exp_val) {
|
||||
const int significant_symbols = 256 >> 4;
|
||||
const float exp_decay_factor = 0.6f;
|
||||
float bits = (float)weight_0 * counts[0];
|
||||
const uint64_t exp_decay_factor = 6; // has a scaling factor of 1/10
|
||||
uint64_t bits = (weight_0 * counts[0]) << LOG_2_PRECISION_BITS;
|
||||
int i;
|
||||
exp_val <<= LOG_2_PRECISION_BITS;
|
||||
for (i = 1; i < significant_symbols; ++i) {
|
||||
bits += exp_val * (counts[i] + counts[256 - i]);
|
||||
exp_val *= exp_decay_factor;
|
||||
bits += DivRound(exp_val * (counts[i] + counts[256 - i]), 100);
|
||||
exp_val = DivRound(exp_decay_factor * exp_val, 10);
|
||||
}
|
||||
return (float)(-0.1 * bits);
|
||||
return -DivRound((int64_t)bits, 10);
|
||||
}
|
||||
|
||||
static float PredictionCostSpatialHistogram(
|
||||
static int64_t PredictionCostSpatialHistogram(
|
||||
const uint32_t accumulated[HISTO_SIZE], const uint32_t tile[HISTO_SIZE],
|
||||
int mode, int left_mode, int above_mode) {
|
||||
int i;
|
||||
float retval = 0.f;
|
||||
int64_t retval = 0;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
const float kExpValue = 0.94f;
|
||||
const uint64_t kExpValue = 94;
|
||||
retval += PredictionCostBias(&tile[i * 256], 1, kExpValue);
|
||||
// Compute the new cost if 'tile' is added to 'accumulate' but also add the
|
||||
// cost of the current histogram to guide the spatial predictor selection.
|
||||
// Basically, favor low entropy, locally and globally.
|
||||
retval += (float)VP8LCombinedShannonEntropy(&tile[i * 256],
|
||||
&accumulated[i * 256]) /
|
||||
(1ll << LOG_2_PRECISION_BITS);
|
||||
retval += (int64_t)VP8LCombinedShannonEntropy(&tile[i * 256],
|
||||
&accumulated[i * 256]);
|
||||
}
|
||||
// Favor keeping the areas locally similar.
|
||||
if (mode == left_mode) retval -= kSpatialPredictorBias;
|
||||
@ -105,8 +113,6 @@ static WEBP_INLINE void PredictBatch(int mode, int x_start, int y,
|
||||
}
|
||||
|
||||
#if (WEBP_NEAR_LOSSLESS == 1)
|
||||
static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; }
|
||||
|
||||
static int MaxDiffBetweenPixels(uint32_t p1, uint32_t p2) {
|
||||
const int diff_a = abs((int)(p1 >> 24) - (int)(p2 >> 24));
|
||||
const int diff_r = abs((int)((p1 >> 16) & 0xff) - (int)((p2 >> 16) & 0xff));
|
||||
@ -305,20 +311,80 @@ static WEBP_INLINE void GetResidual(
|
||||
}
|
||||
}
|
||||
|
||||
// Returns best predictor and updates the accumulated histogram.
|
||||
// Accessors to residual histograms.
|
||||
static WEBP_INLINE uint32_t* GetHistoArgb(uint32_t* const all_histos,
|
||||
int subsampling_index, int mode) {
|
||||
return &all_histos[(subsampling_index * kNumPredModes + mode) * HISTO_SIZE];
|
||||
}
|
||||
|
||||
static WEBP_INLINE const uint32_t* GetHistoArgbConst(
|
||||
const uint32_t* const all_histos, int subsampling_index, int mode) {
|
||||
return &all_histos[subsampling_index * kNumPredModes * HISTO_SIZE +
|
||||
mode * HISTO_SIZE];
|
||||
}
|
||||
|
||||
// Accessors to accumulated residual histogram.
|
||||
static WEBP_INLINE uint32_t* GetAccumulatedHisto(uint32_t* all_accumulated,
|
||||
int subsampling_index) {
|
||||
return &all_accumulated[subsampling_index * HISTO_SIZE];
|
||||
}
|
||||
|
||||
// Find and store the best predictor for a tile at subsampling
|
||||
// 'subsampling_index'.
|
||||
static void GetBestPredictorForTile(const uint32_t* const all_argb,
|
||||
int subsampling_index, int tile_x,
|
||||
int tile_y, int tiles_per_row,
|
||||
uint32_t* all_accumulated_argb,
|
||||
uint32_t** const all_modes,
|
||||
uint32_t* const all_pred_histos) {
|
||||
uint32_t* const accumulated_argb =
|
||||
GetAccumulatedHisto(all_accumulated_argb, subsampling_index);
|
||||
uint32_t* const modes = all_modes[subsampling_index];
|
||||
uint32_t* const pred_histos =
|
||||
&all_pred_histos[subsampling_index * kNumPredModes];
|
||||
// Prediction modes of the left and above neighbor tiles.
|
||||
const int left_mode =
|
||||
(tile_x > 0) ? (modes[tile_y * tiles_per_row + tile_x - 1] >> 8) & 0xff
|
||||
: 0xff;
|
||||
const int above_mode =
|
||||
(tile_y > 0) ? (modes[(tile_y - 1) * tiles_per_row + tile_x] >> 8) & 0xff
|
||||
: 0xff;
|
||||
int mode;
|
||||
int64_t best_diff = WEBP_INT64_MAX;
|
||||
uint32_t best_mode = 0;
|
||||
const uint32_t* best_histo =
|
||||
GetHistoArgbConst(all_argb, /*subsampling_index=*/0, best_mode);
|
||||
for (mode = 0; mode < kNumPredModes; ++mode) {
|
||||
const uint32_t* const histo_argb =
|
||||
GetHistoArgbConst(all_argb, subsampling_index, mode);
|
||||
const int64_t cur_diff = PredictionCostSpatialHistogram(
|
||||
accumulated_argb, histo_argb, mode, left_mode, above_mode);
|
||||
|
||||
if (cur_diff < best_diff) {
|
||||
best_histo = histo_argb;
|
||||
best_diff = cur_diff;
|
||||
best_mode = mode;
|
||||
}
|
||||
}
|
||||
// Update the accumulated histogram.
|
||||
VP8LAddVectorEq(best_histo, accumulated_argb, HISTO_SIZE);
|
||||
modes[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (best_mode << 8);
|
||||
++pred_histos[best_mode];
|
||||
}
|
||||
|
||||
// Computes the residuals for the different predictors.
|
||||
// If max_quantization > 1, assumes that near lossless processing will be
|
||||
// applied, quantizing residuals to multiples of quantization levels up to
|
||||
// max_quantization (the actual quantization level depends on smoothness near
|
||||
// the given pixel).
|
||||
static int GetBestPredictorForTile(
|
||||
int width, int height, int tile_x, int tile_y, int bits,
|
||||
uint32_t accumulated[HISTO_SIZE], uint32_t* const argb_scratch,
|
||||
const uint32_t* const argb, int max_quantization, int exact,
|
||||
int used_subtract_green, const uint32_t* const modes) {
|
||||
const int kNumPredModes = 14;
|
||||
const int start_x = tile_x << bits;
|
||||
const int start_y = tile_y << bits;
|
||||
const int tile_size = 1 << bits;
|
||||
static void ComputeResidualsForTile(
|
||||
int width, int height, int tile_x, int tile_y, int min_bits,
|
||||
uint32_t update_up_to_index, uint32_t* const all_argb,
|
||||
uint32_t* const argb_scratch, const uint32_t* const argb,
|
||||
int max_quantization, int exact, int used_subtract_green) {
|
||||
const int start_x = tile_x << min_bits;
|
||||
const int start_y = tile_y << min_bits;
|
||||
const int tile_size = 1 << min_bits;
|
||||
const int max_y = GetMin(tile_size, height - start_y);
|
||||
const int max_x = GetMin(tile_size, width - start_x);
|
||||
// Whether there exist columns just outside the tile.
|
||||
@ -329,34 +395,20 @@ static int GetBestPredictorForTile(
|
||||
#if (WEBP_NEAR_LOSSLESS == 1)
|
||||
const int context_width = max_x + have_left + (max_x < width - start_x);
|
||||
#endif
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
||||
// Prediction modes of the left and above neighbor tiles.
|
||||
const int left_mode = (tile_x > 0) ?
|
||||
(modes[tile_y * tiles_per_row + tile_x - 1] >> 8) & 0xff : 0xff;
|
||||
const int above_mode = (tile_y > 0) ?
|
||||
(modes[(tile_y - 1) * tiles_per_row + tile_x] >> 8) & 0xff : 0xff;
|
||||
// The width of upper_row and current_row is one pixel larger than image width
|
||||
// to allow the top right pixel to point to the leftmost pixel of the next row
|
||||
// when at the right edge.
|
||||
uint32_t* upper_row = argb_scratch;
|
||||
uint32_t* current_row = upper_row + width + 1;
|
||||
uint8_t* const max_diffs = (uint8_t*)(current_row + width + 1);
|
||||
float best_diff = MAX_DIFF_COST;
|
||||
int best_mode = 0;
|
||||
int mode;
|
||||
uint32_t histo_stack_1[HISTO_SIZE];
|
||||
uint32_t histo_stack_2[HISTO_SIZE];
|
||||
// Need pointers to be able to swap arrays.
|
||||
uint32_t* histo_argb = histo_stack_1;
|
||||
uint32_t* best_histo = histo_stack_2;
|
||||
uint32_t residuals[1 << MAX_TRANSFORM_BITS];
|
||||
assert(bits <= MAX_TRANSFORM_BITS);
|
||||
assert(max_x <= (1 << MAX_TRANSFORM_BITS));
|
||||
|
||||
for (mode = 0; mode < kNumPredModes; ++mode) {
|
||||
float cur_diff;
|
||||
int relative_y;
|
||||
memset(histo_argb, 0, sizeof(histo_stack_1));
|
||||
uint32_t* const histo_argb =
|
||||
GetHistoArgb(all_argb, /*subsampling_index=*/0, mode);
|
||||
if (start_y > 0) {
|
||||
// Read the row above the tile which will become the first upper_row.
|
||||
// Include a pixel to the left if it exists; include a pixel to the right
|
||||
@ -392,21 +444,19 @@ static int GetBestPredictorForTile(
|
||||
for (relative_x = 0; relative_x < max_x; ++relative_x) {
|
||||
UpdateHisto(histo_argb, residuals[relative_x]);
|
||||
}
|
||||
}
|
||||
cur_diff = PredictionCostSpatialHistogram(accumulated, histo_argb, mode,
|
||||
left_mode, above_mode);
|
||||
|
||||
if (cur_diff < best_diff) {
|
||||
uint32_t* tmp = histo_argb;
|
||||
histo_argb = best_histo;
|
||||
best_histo = tmp;
|
||||
best_diff = cur_diff;
|
||||
best_mode = mode;
|
||||
if (update_up_to_index > 0) {
|
||||
uint32_t subsampling_index;
|
||||
for (subsampling_index = 1; subsampling_index <= update_up_to_index;
|
||||
++subsampling_index) {
|
||||
uint32_t* const super_histo =
|
||||
GetHistoArgb(all_argb, subsampling_index, mode);
|
||||
for (relative_x = 0; relative_x < max_x; ++relative_x) {
|
||||
UpdateHisto(super_histo, residuals[relative_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VP8LAddVectorEq(best_histo, accumulated, HISTO_SIZE);
|
||||
return best_mode;
|
||||
}
|
||||
|
||||
// Converts pixels of the image to residuals with respect to predictions.
|
||||
@ -473,15 +523,16 @@ static void CopyImageWithPrediction(int width, int height, int bits,
|
||||
|
||||
// Checks whether 'image' can be subsampled by finding the biggest power of 2
|
||||
// squares (defined by 'best_bits') of uniform value it is made out of.
|
||||
static void OptimizeSampling(uint32_t* const image, int full_width,
|
||||
int full_height, int bits, int* best_bits_out) {
|
||||
void VP8LOptimizeSampling(uint32_t* const image, int full_width,
|
||||
int full_height, int bits, int max_bits,
|
||||
int* best_bits_out) {
|
||||
int width = VP8LSubSampleSize(full_width, bits);
|
||||
int height = VP8LSubSampleSize(full_height, bits);
|
||||
int old_width, x, y, square_size;
|
||||
int best_bits = bits;
|
||||
*best_bits_out = bits;
|
||||
// Check rows first.
|
||||
while (best_bits < MAX_TRANSFORM_BITS) {
|
||||
while (best_bits < max_bits) {
|
||||
const int new_square_size = 1 << (best_bits + 1 - bits);
|
||||
int is_good = 1;
|
||||
square_size = 1 << (best_bits - bits);
|
||||
@ -536,45 +587,238 @@ static void OptimizeSampling(uint32_t* const image, int full_width,
|
||||
*best_bits_out = best_bits;
|
||||
}
|
||||
|
||||
// Computes the best predictor image.
|
||||
// Finds the best predictors per tile. Once done, finds the best predictor image
|
||||
// sampling.
|
||||
// best_bits is set to 0 in case of error.
|
||||
// The following requires some glossary:
|
||||
// - a tile is a square of side 2^min_bits pixels.
|
||||
// - a super-tile of a tile is a square of side 2^bits pixels with bits in
|
||||
// [min_bits+1, max_bits].
|
||||
// - the max-tile of a tile is the square of 2^max_bits pixels containing it.
|
||||
// If this max-tile crosses the border of an image, it is cropped.
|
||||
// - tile, super-tiles and max_tile are aligned on powers of 2 in the original
|
||||
// image.
|
||||
// - coordinates for tile, super-tile, max-tile are respectively named
|
||||
// tile_x, super_tile_x, max_tile_x at their bit scale.
|
||||
// - in the max-tile, a tile has local coordinates (local_tile_x, local_tile_y).
|
||||
// The tiles are processed in the following zigzag order to complete the
|
||||
// super-tiles as soon as possible:
|
||||
// 1 2| 5 6
|
||||
// 3 4| 7 8
|
||||
// --------------
|
||||
// 9 10| 13 14
|
||||
// 11 12| 15 16
|
||||
// When computing the residuals for a tile, the histogram of the above
|
||||
// super-tile is updated. If this super-tile is finished, its histogram is used
|
||||
// to update the histogram of the next super-tile and so on up to the max-tile.
|
||||
static void GetBestPredictorsAndSubSampling(
|
||||
int width, int height, const int min_bits, const int max_bits,
|
||||
uint32_t* const argb_scratch, const uint32_t* const argb,
|
||||
int max_quantization, int exact, int used_subtract_green,
|
||||
const WebPPicture* const pic, int percent_range, int* const percent,
|
||||
uint32_t** const all_modes, int* best_bits, uint32_t** best_mode) {
|
||||
const uint32_t tiles_per_row = VP8LSubSampleSize(width, min_bits);
|
||||
const uint32_t tiles_per_col = VP8LSubSampleSize(height, min_bits);
|
||||
int64_t best_cost;
|
||||
uint32_t subsampling_index;
|
||||
const uint32_t max_subsampling_index = max_bits - min_bits;
|
||||
// Compute the needed memory size for residual histograms, accumulated
|
||||
// residual histograms and predictor histograms.
|
||||
const int num_argb = (max_subsampling_index + 1) * kNumPredModes * HISTO_SIZE;
|
||||
const int num_accumulated_rgb = (max_subsampling_index + 1) * HISTO_SIZE;
|
||||
const int num_predictors = (max_subsampling_index + 1) * kNumPredModes;
|
||||
uint32_t* const raw_data = (uint32_t*)WebPSafeCalloc(
|
||||
num_argb + num_accumulated_rgb + num_predictors, sizeof(uint32_t));
|
||||
uint32_t* const all_argb = raw_data;
|
||||
uint32_t* const all_accumulated_argb = all_argb + num_argb;
|
||||
uint32_t* const all_pred_histos = all_accumulated_argb + num_accumulated_rgb;
|
||||
const int max_tile_size = 1 << max_subsampling_index; // in tile size
|
||||
int percent_start = *percent;
|
||||
// When using the residuals of a tile for its super-tiles, you can either:
|
||||
// - use each residual to update the histogram of the super-tile, with a cost
|
||||
// of 4 * (1<<n)^2 increment operations (4 for the number of channels, and
|
||||
// (1<<n)^2 for the number of pixels in the tile)
|
||||
// - use the histogram of the tile to update the histogram of the super-tile,
|
||||
// with a cost of HISTO_SIZE (1024)
|
||||
// The first method is therefore faster until n==4. 'update_up_to_index'
|
||||
// defines the maximum subsampling_index for which the residuals should be
|
||||
// individually added to the super-tile histogram.
|
||||
const uint32_t update_up_to_index =
|
||||
GetMax(GetMin(4, max_bits), min_bits) - min_bits;
|
||||
// Coordinates in the max-tile in tile units.
|
||||
uint32_t local_tile_x = 0, local_tile_y = 0;
|
||||
uint32_t max_tile_x = 0, max_tile_y = 0;
|
||||
uint32_t tile_x = 0, tile_y = 0;
|
||||
|
||||
*best_bits = 0;
|
||||
*best_mode = NULL;
|
||||
if (raw_data == NULL) return;
|
||||
|
||||
while (tile_y < tiles_per_col) {
|
||||
ComputeResidualsForTile(width, height, tile_x, tile_y, min_bits,
|
||||
update_up_to_index, all_argb, argb_scratch, argb,
|
||||
max_quantization, exact, used_subtract_green);
|
||||
|
||||
// Update all the super-tiles that are complete.
|
||||
subsampling_index = 0;
|
||||
while (1) {
|
||||
const uint32_t super_tile_x = tile_x >> subsampling_index;
|
||||
const uint32_t super_tile_y = tile_y >> subsampling_index;
|
||||
const uint32_t super_tiles_per_row =
|
||||
VP8LSubSampleSize(width, min_bits + subsampling_index);
|
||||
GetBestPredictorForTile(all_argb, subsampling_index, super_tile_x,
|
||||
super_tile_y, super_tiles_per_row,
|
||||
all_accumulated_argb, all_modes, all_pred_histos);
|
||||
if (subsampling_index == max_subsampling_index) break;
|
||||
|
||||
// Update the following super-tile histogram if it has not been updated
|
||||
// yet.
|
||||
++subsampling_index;
|
||||
if (subsampling_index > update_up_to_index &&
|
||||
subsampling_index <= max_subsampling_index) {
|
||||
VP8LAddVectorEq(
|
||||
GetHistoArgbConst(all_argb, subsampling_index - 1, /*mode=*/0),
|
||||
GetHistoArgb(all_argb, subsampling_index, /*mode=*/0),
|
||||
HISTO_SIZE * kNumPredModes);
|
||||
}
|
||||
// Check whether the super-tile is not complete (if the smallest tile
|
||||
// is not at the end of a line/column or at the beginning of a super-tile
|
||||
// of size (1 << subsampling_index)).
|
||||
if (!((tile_x == (tiles_per_row - 1) ||
|
||||
(local_tile_x + 1) % (1 << subsampling_index) == 0) &&
|
||||
(tile_y == (tiles_per_col - 1) ||
|
||||
(local_tile_y + 1) % (1 << subsampling_index) == 0))) {
|
||||
--subsampling_index;
|
||||
// subsampling_index now is the index of the last finished super-tile.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Reset all the histograms belonging to finished tiles.
|
||||
memset(all_argb, 0,
|
||||
HISTO_SIZE * kNumPredModes * (subsampling_index + 1) *
|
||||
sizeof(*all_argb));
|
||||
|
||||
if (subsampling_index == max_subsampling_index) {
|
||||
// If a new max-tile is started.
|
||||
if (tile_x == (tiles_per_row - 1)) {
|
||||
max_tile_x = 0;
|
||||
++max_tile_y;
|
||||
} else {
|
||||
++max_tile_x;
|
||||
}
|
||||
local_tile_x = 0;
|
||||
local_tile_y = 0;
|
||||
} else {
|
||||
// Proceed with the Z traversal.
|
||||
uint32_t coord_x = local_tile_x >> subsampling_index;
|
||||
uint32_t coord_y = local_tile_y >> subsampling_index;
|
||||
if (tile_x == (tiles_per_row - 1) && coord_x % 2 == 0) {
|
||||
++coord_y;
|
||||
} else {
|
||||
if (coord_x % 2 == 0) {
|
||||
++coord_x;
|
||||
} else {
|
||||
// Z traversal.
|
||||
++coord_y;
|
||||
--coord_x;
|
||||
}
|
||||
}
|
||||
local_tile_x = coord_x << subsampling_index;
|
||||
local_tile_y = coord_y << subsampling_index;
|
||||
}
|
||||
tile_x = max_tile_x * max_tile_size + local_tile_x;
|
||||
tile_y = max_tile_y * max_tile_size + local_tile_y;
|
||||
|
||||
if (tile_x == 0 &&
|
||||
!WebPReportProgress(
|
||||
pic, percent_start + percent_range * tile_y / tiles_per_col,
|
||||
percent)) {
|
||||
WebPSafeFree(raw_data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out the best sampling.
|
||||
best_cost = WEBP_INT64_MAX;
|
||||
for (subsampling_index = 0; subsampling_index <= max_subsampling_index;
|
||||
++subsampling_index) {
|
||||
int plane;
|
||||
const uint32_t* const accumulated =
|
||||
GetAccumulatedHisto(all_accumulated_argb, subsampling_index);
|
||||
int64_t cost = VP8LShannonEntropy(
|
||||
&all_pred_histos[subsampling_index * kNumPredModes], kNumPredModes);
|
||||
for (plane = 0; plane < 4; ++plane) {
|
||||
cost += VP8LShannonEntropy(&accumulated[plane * 256], 256);
|
||||
}
|
||||
if (cost < best_cost) {
|
||||
best_cost = cost;
|
||||
*best_bits = min_bits + subsampling_index;
|
||||
*best_mode = all_modes[subsampling_index];
|
||||
}
|
||||
}
|
||||
|
||||
WebPSafeFree(raw_data);
|
||||
|
||||
VP8LOptimizeSampling(*best_mode, width, height, *best_bits,
|
||||
MAX_TRANSFORM_BITS, best_bits);
|
||||
}
|
||||
|
||||
// Finds the best predictor for each tile, and converts the image to residuals
|
||||
// with respect to predictions. If near_lossless_quality < 100, applies
|
||||
// near lossless processing, shaving off more bits of residuals for lower
|
||||
// qualities.
|
||||
int VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless_quality,
|
||||
int exact, int used_subtract_green,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent, int* const best_bits) {
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
||||
const int tiles_per_col = VP8LSubSampleSize(height, bits);
|
||||
int VP8LResidualImage(int width, int height, int min_bits, int max_bits,
|
||||
int low_effort, uint32_t* const argb,
|
||||
uint32_t* const argb_scratch, uint32_t* const image,
|
||||
int near_lossless_quality, int exact,
|
||||
int used_subtract_green, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent,
|
||||
int* const best_bits) {
|
||||
int percent_start = *percent;
|
||||
const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);
|
||||
if (low_effort) {
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, max_bits);
|
||||
const int tiles_per_col = VP8LSubSampleSize(height, max_bits);
|
||||
int i;
|
||||
for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
|
||||
image[i] = ARGB_BLACK | (kPredLowEffort << 8);
|
||||
}
|
||||
*best_bits = bits;
|
||||
*best_bits = max_bits;
|
||||
} else {
|
||||
int tile_y;
|
||||
uint32_t histo[HISTO_SIZE] = { 0 };
|
||||
for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
|
||||
int tile_x;
|
||||
for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
|
||||
const int pred = GetBestPredictorForTile(
|
||||
width, height, tile_x, tile_y, bits, histo, argb_scratch, argb,
|
||||
max_quantization, exact, used_subtract_green, image);
|
||||
image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
|
||||
}
|
||||
|
||||
if (!WebPReportProgress(
|
||||
pic, percent_start + percent_range * tile_y / tiles_per_col,
|
||||
percent)) {
|
||||
return 0;
|
||||
}
|
||||
// Allocate data to try all samplings from min_bits to max_bits.
|
||||
int bits;
|
||||
uint32_t sum_num_pixels = 0u;
|
||||
uint32_t *modes_raw, *best_mode;
|
||||
uint32_t* modes[MAX_TRANSFORM_BITS + 1];
|
||||
uint32_t num_pixels[MAX_TRANSFORM_BITS + 1];
|
||||
for (bits = min_bits; bits <= max_bits; ++bits) {
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
||||
const int tiles_per_col = VP8LSubSampleSize(height, bits);
|
||||
num_pixels[bits] = tiles_per_row * tiles_per_col;
|
||||
sum_num_pixels += num_pixels[bits];
|
||||
}
|
||||
OptimizeSampling(image, width, height, bits, best_bits);
|
||||
modes_raw = (uint32_t*)WebPSafeMalloc(sum_num_pixels, sizeof(*modes_raw));
|
||||
if (modes_raw == NULL) return 0;
|
||||
// Have modes point to the right global memory modes_raw.
|
||||
modes[min_bits] = modes_raw;
|
||||
for (bits = min_bits + 1; bits <= max_bits; ++bits) {
|
||||
modes[bits] = modes[bits - 1] + num_pixels[bits - 1];
|
||||
}
|
||||
// Find the best sampling.
|
||||
GetBestPredictorsAndSubSampling(
|
||||
width, height, min_bits, max_bits, argb_scratch, argb, max_quantization,
|
||||
exact, used_subtract_green, pic, percent_range, percent,
|
||||
&modes[min_bits], best_bits, &best_mode);
|
||||
if (*best_bits == 0) {
|
||||
WebPSafeFree(modes_raw);
|
||||
return 0;
|
||||
}
|
||||
// Keep the best predictor image.
|
||||
memcpy(image, best_mode,
|
||||
VP8LSubSampleSize(width, *best_bits) *
|
||||
VP8LSubSampleSize(height, *best_bits) * sizeof(*image));
|
||||
WebPSafeFree(modes_raw);
|
||||
}
|
||||
|
||||
CopyImageWithPrediction(width, height, *best_bits, image, argb_scratch, argb,
|
||||
@ -607,35 +851,36 @@ static WEBP_INLINE uint32_t MultipliersToColorCode(
|
||||
m->green_to_red_;
|
||||
}
|
||||
|
||||
static float PredictionCostCrossColor(const uint32_t accumulated[256],
|
||||
const uint32_t counts[256]) {
|
||||
static int64_t PredictionCostCrossColor(const uint32_t accumulated[256],
|
||||
const uint32_t counts[256]) {
|
||||
// Favor low entropy, locally and globally.
|
||||
// Favor small absolute values for PredictionCostSpatial
|
||||
static const float kExpValue = 2.4f;
|
||||
return (float)VP8LCombinedShannonEntropy(counts, accumulated) /
|
||||
(1ll << LOG_2_PRECISION_BITS) +
|
||||
static const uint64_t kExpValue = 240;
|
||||
return (int64_t)VP8LCombinedShannonEntropy(counts, accumulated) +
|
||||
PredictionCostBias(counts, 3, kExpValue);
|
||||
}
|
||||
|
||||
static float GetPredictionCostCrossColorRed(
|
||||
static int64_t GetPredictionCostCrossColorRed(
|
||||
const uint32_t* argb, int stride, int tile_width, int tile_height,
|
||||
VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
|
||||
const uint32_t accumulated_red_histo[256]) {
|
||||
uint32_t histo[256] = { 0 };
|
||||
float cur_diff;
|
||||
int64_t cur_diff;
|
||||
|
||||
VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,
|
||||
green_to_red, histo);
|
||||
|
||||
cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
|
||||
if ((uint8_t)green_to_red == prev_x.green_to_red_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if ((uint8_t)green_to_red == prev_y.green_to_red_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if (green_to_red == 0) {
|
||||
cur_diff -= 3;
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
return cur_diff;
|
||||
}
|
||||
@ -648,9 +893,9 @@ static void GetBestGreenToRed(const uint32_t* argb, int stride, int tile_width,
|
||||
const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
|
||||
int green_to_red_best = 0;
|
||||
int iter, offset;
|
||||
float best_diff = GetPredictionCostCrossColorRed(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y,
|
||||
green_to_red_best, accumulated_red_histo);
|
||||
int64_t best_diff = GetPredictionCostCrossColorRed(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y, green_to_red_best,
|
||||
accumulated_red_histo);
|
||||
for (iter = 0; iter < kMaxIters; ++iter) {
|
||||
// ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
|
||||
// one in color computation. Having initial delta here as 1 is sufficient
|
||||
@ -659,7 +904,7 @@ static void GetBestGreenToRed(const uint32_t* argb, int stride, int tile_width,
|
||||
// Try a negative and a positive delta from the best known value.
|
||||
for (offset = -delta; offset <= delta; offset += 2 * delta) {
|
||||
const int green_to_red_cur = offset + green_to_red_best;
|
||||
const float cur_diff = GetPredictionCostCrossColorRed(
|
||||
const int64_t cur_diff = GetPredictionCostCrossColorRed(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y,
|
||||
green_to_red_cur, accumulated_red_histo);
|
||||
if (cur_diff < best_diff) {
|
||||
@ -671,34 +916,38 @@ static void GetBestGreenToRed(const uint32_t* argb, int stride, int tile_width,
|
||||
best_tx->green_to_red_ = (green_to_red_best & 0xff);
|
||||
}
|
||||
|
||||
static float GetPredictionCostCrossColorBlue(
|
||||
static int64_t GetPredictionCostCrossColorBlue(
|
||||
const uint32_t* argb, int stride, int tile_width, int tile_height,
|
||||
VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_blue,
|
||||
int red_to_blue, const uint32_t accumulated_blue_histo[256]) {
|
||||
uint32_t histo[256] = { 0 };
|
||||
float cur_diff;
|
||||
int64_t cur_diff;
|
||||
|
||||
VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,
|
||||
green_to_blue, red_to_blue, histo);
|
||||
|
||||
cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
|
||||
if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if (green_to_blue == 0) {
|
||||
cur_diff -= 3;
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if (red_to_blue == 0) {
|
||||
cur_diff -= 3;
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
return cur_diff;
|
||||
}
|
||||
@ -720,9 +969,9 @@ static void GetBestGreenRedToBlue(const uint32_t* argb, int stride,
|
||||
int red_to_blue_best = 0;
|
||||
int iter;
|
||||
// Initial value at origin:
|
||||
float best_diff = GetPredictionCostCrossColorBlue(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y,
|
||||
green_to_blue_best, red_to_blue_best, accumulated_blue_histo);
|
||||
int64_t best_diff = GetPredictionCostCrossColorBlue(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y, green_to_blue_best,
|
||||
red_to_blue_best, accumulated_blue_histo);
|
||||
for (iter = 0; iter < iters; ++iter) {
|
||||
const int delta = delta_lut[iter];
|
||||
int axis;
|
||||
@ -730,7 +979,7 @@ static void GetBestGreenRedToBlue(const uint32_t* argb, int stride,
|
||||
const int green_to_blue_cur =
|
||||
offset[axis][0] * delta + green_to_blue_best;
|
||||
const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;
|
||||
const float cur_diff = GetPredictionCostCrossColorBlue(
|
||||
const int64_t cur_diff = GetPredictionCostCrossColorBlue(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y,
|
||||
green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);
|
||||
if (cur_diff < best_diff) {
|
||||
@ -856,6 +1105,7 @@ int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
OptimizeSampling(image, width, height, bits, best_bits);
|
||||
VP8LOptimizeSampling(image, width, height, bits, MAX_TRANSFORM_BITS,
|
||||
best_bits);
|
||||
return 1;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
#include "src/dec/common_dec.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/utils/bit_writer_utils.h"
|
||||
#include "src/utils/thread_utils.h"
|
||||
@ -31,7 +32,7 @@ extern "C" {
|
||||
|
||||
// version numbers
|
||||
#define ENC_MAJ_VERSION 1
|
||||
#define ENC_MIN_VERSION 4
|
||||
#define ENC_MIN_VERSION 5
|
||||
#define ENC_REV_VERSION 0
|
||||
|
||||
enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
|
||||
@ -233,7 +234,11 @@ typedef struct {
|
||||
VP8BitWriter* bw_; // current bit-writer
|
||||
uint8_t* preds_; // intra mode predictors (4x4 blocks)
|
||||
uint32_t* nz_; // non-zero pattern
|
||||
#if WEBP_AARCH64 && BPS == 32
|
||||
uint8_t i4_boundary_[40]; // 32+8 boundary samples needed by intra4x4
|
||||
#else
|
||||
uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4
|
||||
#endif
|
||||
uint8_t* i4_top_; // pointer to the current top boundary sample
|
||||
int i4_; // current intra4x4 mode being tested
|
||||
int top_nz_[9]; // top-non-zero context.
|
||||
|
@ -31,6 +31,9 @@
|
||||
// Maximum number of histogram images (sub-blocks).
|
||||
#define MAX_HUFF_IMAGE_SIZE 2600
|
||||
#define MAX_HUFFMAN_BITS (MIN_HUFFMAN_BITS + (1 << NUM_HUFFMAN_BITS) - 1)
|
||||
// Empirical value for which it becomes too computationally expensive to
|
||||
// compute the best predictor image.
|
||||
#define MAX_PREDICTOR_IMAGE_SIZE (1 << 14)
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Palette
|
||||
@ -232,17 +235,33 @@ static int AnalyzeEntropy(const uint32_t* argb,
|
||||
}
|
||||
}
|
||||
|
||||
// Clamp histogram and transform bits.
|
||||
static int ClampBits(int width, int height, int bits, int min_bits,
|
||||
int max_bits, int image_size_max) {
|
||||
int image_size;
|
||||
bits = (bits < min_bits) ? min_bits : (bits > max_bits) ? max_bits : bits;
|
||||
image_size = VP8LSubSampleSize(width, bits) * VP8LSubSampleSize(height, bits);
|
||||
while (bits < max_bits && image_size > image_size_max) {
|
||||
++bits;
|
||||
image_size =
|
||||
VP8LSubSampleSize(width, bits) * VP8LSubSampleSize(height, bits);
|
||||
}
|
||||
// In case the bits reduce the image too much, choose the smallest value
|
||||
// setting the histogram image size to 1.
|
||||
while (bits > min_bits && image_size == 1) {
|
||||
image_size = VP8LSubSampleSize(width, bits - 1) *
|
||||
VP8LSubSampleSize(height, bits - 1);
|
||||
if (image_size != 1) break;
|
||||
--bits;
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
static int GetHistoBits(int method, int use_palette, int width, int height) {
|
||||
// Make tile size a function of encoding method (Range: 0 to 6).
|
||||
int histo_bits = (use_palette ? 9 : 7) - method;
|
||||
while (1) {
|
||||
const int huff_image_size = VP8LSubSampleSize(width, histo_bits) *
|
||||
VP8LSubSampleSize(height, histo_bits);
|
||||
if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break;
|
||||
++histo_bits;
|
||||
}
|
||||
return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS :
|
||||
(histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
|
||||
const int histo_bits = (use_palette ? 9 : 7) - method;
|
||||
return ClampBits(width, height, histo_bits, MIN_HUFFMAN_BITS,
|
||||
MAX_HUFFMAN_BITS, MAX_HUFF_IMAGE_SIZE);
|
||||
}
|
||||
|
||||
static int GetTransformBits(int method, int histo_bits) {
|
||||
@ -664,11 +683,12 @@ static WEBP_INLINE void WriteHuffmanCodeWithExtraBits(
|
||||
VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits);
|
||||
}
|
||||
|
||||
static int StoreImageToBitMask(
|
||||
VP8LBitWriter* const bw, int width, int histo_bits,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
const uint16_t* histogram_symbols,
|
||||
const HuffmanTreeCode* const huffman_codes, const WebPPicture* const pic) {
|
||||
static int StoreImageToBitMask(VP8LBitWriter* const bw, int width,
|
||||
int histo_bits,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
const uint32_t* histogram_symbols,
|
||||
const HuffmanTreeCode* const huffman_codes,
|
||||
const WebPPicture* const pic) {
|
||||
const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
|
||||
const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits);
|
||||
// x and y trace the position in the image.
|
||||
@ -676,7 +696,7 @@ static int StoreImageToBitMask(
|
||||
int y = 0;
|
||||
int tile_x = x & tile_mask;
|
||||
int tile_y = y & tile_mask;
|
||||
int histogram_ix = histogram_symbols[0];
|
||||
int histogram_ix = (histogram_symbols[0] >> 8) & 0xffff;
|
||||
const HuffmanTreeCode* codes = huffman_codes + 5 * histogram_ix;
|
||||
VP8LRefsCursor c = VP8LRefsCursorInit(refs);
|
||||
while (VP8LRefsCursorOk(&c)) {
|
||||
@ -684,8 +704,10 @@ static int StoreImageToBitMask(
|
||||
if ((tile_x != (x & tile_mask)) || (tile_y != (y & tile_mask))) {
|
||||
tile_x = x & tile_mask;
|
||||
tile_y = y & tile_mask;
|
||||
histogram_ix = histogram_symbols[(y >> histo_bits) * histo_xsize +
|
||||
(x >> histo_bits)];
|
||||
histogram_ix = (histogram_symbols[(y >> histo_bits) * histo_xsize +
|
||||
(x >> histo_bits)] >>
|
||||
8) &
|
||||
0xffff;
|
||||
codes = huffman_codes + 5 * histogram_ix;
|
||||
}
|
||||
if (PixOrCopyIsLiteral(v)) {
|
||||
@ -741,7 +763,7 @@ static int EncodeImageNoHuffman(VP8LBitWriter* const bw,
|
||||
VP8LBackwardRefs* refs;
|
||||
HuffmanTreeToken* tokens = NULL;
|
||||
HuffmanTreeCode huffman_codes[5] = {{0, NULL, NULL}};
|
||||
const uint16_t histogram_symbols[1] = {0}; // only one tree, one symbol
|
||||
const uint32_t histogram_symbols[1] = {0}; // only one tree, one symbol
|
||||
int cache_bits = 0;
|
||||
VP8LHistogramSet* histogram_image = NULL;
|
||||
HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(
|
||||
@ -824,32 +846,32 @@ static int EncodeImageInternal(
|
||||
VP8LBitWriter* const bw, const uint32_t* const argb,
|
||||
VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[4], int width,
|
||||
int height, int quality, int low_effort, const CrunchConfig* const config,
|
||||
int* cache_bits, int histogram_bits, size_t init_byte_position,
|
||||
int* cache_bits, int histogram_bits_in, size_t init_byte_position,
|
||||
int* const hdr_size, int* const data_size, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent) {
|
||||
const uint32_t histogram_image_xysize =
|
||||
VP8LSubSampleSize(width, histogram_bits) *
|
||||
VP8LSubSampleSize(height, histogram_bits);
|
||||
VP8LSubSampleSize(width, histogram_bits_in) *
|
||||
VP8LSubSampleSize(height, histogram_bits_in);
|
||||
int remaining_percent = percent_range;
|
||||
int percent_start = *percent;
|
||||
VP8LHistogramSet* histogram_image = NULL;
|
||||
VP8LHistogram* tmp_histo = NULL;
|
||||
int histogram_image_size = 0;
|
||||
uint32_t i, histogram_image_size = 0;
|
||||
size_t bit_array_size = 0;
|
||||
HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(
|
||||
3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
|
||||
HuffmanTreeToken* tokens = NULL;
|
||||
HuffmanTreeCode* huffman_codes = NULL;
|
||||
uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc(
|
||||
histogram_image_xysize, sizeof(*histogram_symbols));
|
||||
uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc(
|
||||
histogram_image_xysize, sizeof(*histogram_argb));
|
||||
int sub_configs_idx;
|
||||
int cache_bits_init, write_histogram_image;
|
||||
VP8LBitWriter bw_init = *bw, bw_best;
|
||||
int hdr_size_tmp;
|
||||
VP8LHashChain hash_chain_histogram; // histogram image hash chain
|
||||
size_t bw_size_best = ~(size_t)0;
|
||||
assert(histogram_bits >= MIN_HUFFMAN_BITS);
|
||||
assert(histogram_bits <= MAX_HUFFMAN_BITS);
|
||||
assert(histogram_bits_in >= MIN_HUFFMAN_BITS);
|
||||
assert(histogram_bits_in <= MAX_HUFFMAN_BITS);
|
||||
assert(hdr_size != NULL);
|
||||
assert(data_size != NULL);
|
||||
|
||||
@ -860,7 +882,7 @@ static int EncodeImageInternal(
|
||||
}
|
||||
|
||||
// Make sure we can allocate the different objects.
|
||||
if (huff_tree == NULL || histogram_symbols == NULL ||
|
||||
if (huff_tree == NULL || histogram_argb == NULL ||
|
||||
!VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize)) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
@ -902,6 +924,7 @@ static int EncodeImageInternal(
|
||||
|
||||
for (i_cache = 0; i_cache < (sub_config->do_no_cache_ ? 2 : 1); ++i_cache) {
|
||||
const int cache_bits_tmp = (i_cache == 0) ? cache_bits_best : 0;
|
||||
int histogram_bits = histogram_bits_in;
|
||||
// Speed-up: no need to study the no-cache case if it was already studied
|
||||
// in i_cache == 0.
|
||||
if (i_cache == 1 && cache_bits_best == 0) break;
|
||||
@ -923,7 +946,7 @@ static int EncodeImageInternal(
|
||||
if (!VP8LGetHistoImageSymbols(
|
||||
width, height, &refs_array[i_cache], quality, low_effort,
|
||||
histogram_bits, cache_bits_tmp, histogram_image, tmp_histo,
|
||||
histogram_symbols, pic, i_percent_range, percent)) {
|
||||
histogram_argb, pic, i_percent_range, percent)) {
|
||||
goto Error;
|
||||
}
|
||||
// Create Huffman bit lengths and codes for each histogram image.
|
||||
@ -956,26 +979,19 @@ static int EncodeImageInternal(
|
||||
}
|
||||
|
||||
// Huffman image + meta huffman.
|
||||
histogram_image_size = 0;
|
||||
for (i = 0; i < histogram_image_xysize; ++i) {
|
||||
if (histogram_argb[i] >= histogram_image_size) {
|
||||
histogram_image_size = histogram_argb[i] + 1;
|
||||
}
|
||||
histogram_argb[i] <<= 8;
|
||||
}
|
||||
|
||||
write_histogram_image = (histogram_image_size > 1);
|
||||
VP8LPutBits(bw, write_histogram_image, 1);
|
||||
if (write_histogram_image) {
|
||||
uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc(
|
||||
histogram_image_xysize, sizeof(*histogram_argb));
|
||||
int max_index = 0;
|
||||
uint32_t i;
|
||||
if (histogram_argb == NULL) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
for (i = 0; i < histogram_image_xysize; ++i) {
|
||||
const int symbol_index = histogram_symbols[i] & 0xffff;
|
||||
histogram_argb[i] = (symbol_index << 8);
|
||||
if (symbol_index >= max_index) {
|
||||
max_index = symbol_index + 1;
|
||||
}
|
||||
}
|
||||
histogram_image_size = max_index;
|
||||
|
||||
VP8LOptimizeSampling(histogram_argb, width, height, histogram_bits_in,
|
||||
MAX_HUFFMAN_BITS, &histogram_bits);
|
||||
VP8LPutBits(bw, histogram_bits - 2, 3);
|
||||
i_percent_range = i_remaining_percent / 2;
|
||||
i_remaining_percent -= i_percent_range;
|
||||
@ -984,15 +1000,12 @@ static int EncodeImageInternal(
|
||||
VP8LSubSampleSize(width, histogram_bits),
|
||||
VP8LSubSampleSize(height, histogram_bits), quality, low_effort,
|
||||
pic, i_percent_range, percent)) {
|
||||
WebPSafeFree(histogram_argb);
|
||||
goto Error;
|
||||
}
|
||||
WebPSafeFree(histogram_argb);
|
||||
}
|
||||
|
||||
// Store Huffman codes.
|
||||
{
|
||||
int i;
|
||||
int max_tokens = 0;
|
||||
// Find maximum number of symbols for the huffman tree-set.
|
||||
for (i = 0; i < 5 * histogram_image_size; ++i) {
|
||||
@ -1015,7 +1028,7 @@ static int EncodeImageInternal(
|
||||
// Store actual literals.
|
||||
hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
|
||||
if (!StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache],
|
||||
histogram_symbols, huffman_codes, pic)) {
|
||||
histogram_argb, huffman_codes, pic)) {
|
||||
goto Error;
|
||||
}
|
||||
// Keep track of the smallest image so far.
|
||||
@ -1052,7 +1065,7 @@ static int EncodeImageInternal(
|
||||
WebPSafeFree(huffman_codes->codes);
|
||||
WebPSafeFree(huffman_codes);
|
||||
}
|
||||
WebPSafeFree(histogram_symbols);
|
||||
WebPSafeFree(histogram_argb);
|
||||
VP8LBitWriterWipeOut(&bw_best);
|
||||
return (pic->error_code == VP8_ENC_OK);
|
||||
}
|
||||
@ -1071,14 +1084,19 @@ static int ApplyPredictFilter(VP8LEncoder* const enc, int width, int height,
|
||||
int quality, int low_effort,
|
||||
int used_subtract_green, VP8LBitWriter* const bw,
|
||||
int percent_range, int* const percent) {
|
||||
const int min_bits = enc->predictor_transform_bits_;
|
||||
int best_bits;
|
||||
// we disable near-lossless quantization if palette is used.
|
||||
const int near_lossless_strength =
|
||||
enc->use_palette_ ? 100 : enc->config_->near_lossless;
|
||||
const int max_bits = ClampBits(width, height, enc->predictor_transform_bits_,
|
||||
MIN_TRANSFORM_BITS, MAX_TRANSFORM_BITS,
|
||||
MAX_PREDICTOR_IMAGE_SIZE);
|
||||
const int min_bits = ClampBits(
|
||||
width, height,
|
||||
max_bits - 2 * (enc->config_->method > 4 ? enc->config_->method - 4 : 0),
|
||||
MIN_TRANSFORM_BITS, MAX_TRANSFORM_BITS, MAX_PREDICTOR_IMAGE_SIZE);
|
||||
|
||||
if (!VP8LResidualImage(width, height, min_bits, low_effort, enc->argb_,
|
||||
enc->argb_scratch_, enc->transform_data_,
|
||||
if (!VP8LResidualImage(width, height, min_bits, max_bits, low_effort,
|
||||
enc->argb_, enc->argb_scratch_, enc->transform_data_,
|
||||
near_lossless_strength, enc->config_->exact,
|
||||
used_subtract_green, enc->pic_, percent_range / 2,
|
||||
percent, &best_bits)) {
|
||||
@ -1201,14 +1219,10 @@ static int AllocateTransformBuffer(VP8LEncoder* const enc, int width,
|
||||
enc->use_predict_ ? (width + 1) * 2 + (width * 2 + sizeof(uint32_t) - 1) /
|
||||
sizeof(uint32_t)
|
||||
: 0;
|
||||
const int min_transform_bits =
|
||||
(enc->predictor_transform_bits_ < enc->cross_color_transform_bits_)
|
||||
? enc->predictor_transform_bits_
|
||||
: enc->cross_color_transform_bits_;
|
||||
const uint64_t transform_data_size =
|
||||
(enc->use_predict_ || enc->use_cross_color_)
|
||||
? (uint64_t)VP8LSubSampleSize(width, min_transform_bits) *
|
||||
VP8LSubSampleSize(height, min_transform_bits)
|
||||
? (uint64_t)VP8LSubSampleSize(width, MIN_TRANSFORM_BITS) *
|
||||
VP8LSubSampleSize(height, MIN_TRANSFORM_BITS)
|
||||
: 0;
|
||||
const uint64_t max_alignment_in_words =
|
||||
(WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t);
|
||||
|
@ -105,10 +105,10 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
|
||||
|
||||
// pic and percent are for progress.
|
||||
// Returns false in case of error (stored in pic->error_code).
|
||||
int VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless_quality,
|
||||
int exact, int used_subtract_green,
|
||||
int VP8LResidualImage(int width, int height, int min_bits, int max_bits,
|
||||
int low_effort, uint32_t* const argb,
|
||||
uint32_t* const argb_scratch, uint32_t* const image,
|
||||
int near_lossless, int exact, int used_subtract_green,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent, int* const best_bits);
|
||||
|
||||
@ -117,6 +117,10 @@ int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent, int* const best_bits);
|
||||
|
||||
void VP8LOptimizeSampling(uint32_t* const image, int full_width,
|
||||
int full_height, int bits, int max_bits,
|
||||
int* best_bits_out);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -6,8 +6,8 @@
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 1,0,4,0
|
||||
PRODUCTVERSION 1,0,4,0
|
||||
FILEVERSION 1,0,5,0
|
||||
PRODUCTVERSION 1,0,5,0
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -24,12 +24,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libwebp DLL"
|
||||
VALUE "FileVersion", "1.4.0"
|
||||
VALUE "FileVersion", "1.5.0"
|
||||
VALUE "InternalName", "libwebp.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2024"
|
||||
VALUE "OriginalFilename", "libwebp.dll"
|
||||
VALUE "ProductName", "WebP Image Codec"
|
||||
VALUE "ProductVersion", "1.4.0"
|
||||
VALUE "ProductVersion", "1.5.0"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -6,8 +6,8 @@
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 1,0,4,0
|
||||
PRODUCTVERSION 1,0,4,0
|
||||
FILEVERSION 1,0,5,0
|
||||
PRODUCTVERSION 1,0,5,0
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -24,12 +24,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libwebpdecoder DLL"
|
||||
VALUE "FileVersion", "1.4.0"
|
||||
VALUE "FileVersion", "1.5.0"
|
||||
VALUE "InternalName", "libwebpdecoder.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2024"
|
||||
VALUE "OriginalFilename", "libwebpdecoder.dll"
|
||||
VALUE "ProductName", "WebP Image Decoder"
|
||||
VALUE "ProductVersion", "1.4.0"
|
||||
VALUE "ProductVersion", "1.5.0"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -17,6 +17,6 @@ noinst_HEADERS =
|
||||
noinst_HEADERS += ../webp/format_constants.h
|
||||
|
||||
libwebpmux_la_LIBADD = ../libwebp.la
|
||||
libwebpmux_la_LDFLAGS = -no-undefined -version-info 4:0:1 -lm
|
||||
libwebpmux_la_LDFLAGS = -no-undefined -version-info 4:1:1 -lm
|
||||
libwebpmuxincludedir = $(includedir)/webp
|
||||
pkgconfig_DATA = libwebpmux.pc
|
||||
|
@ -191,7 +191,8 @@ int WebPAnimEncoderOptionsInitInternal(WebPAnimEncoderOptions* enc_options,
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This starting value is more fit to WebPCleanupTransparentAreaLossless().
|
||||
// This value is used to match a later call to WebPReplaceTransparentPixels(),
|
||||
// making it a no-op for lossless (see WebPEncode()).
|
||||
#define TRANSPARENT_COLOR 0x00000000
|
||||
|
||||
static void ClearRectangle(WebPPicture* const picture,
|
||||
|
@ -6,8 +6,8 @@
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 1,0,4,0
|
||||
PRODUCTVERSION 1,0,4,0
|
||||
FILEVERSION 1,0,5,0
|
||||
PRODUCTVERSION 1,0,5,0
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
@ -24,12 +24,12 @@ BEGIN
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libwebpmux DLL"
|
||||
VALUE "FileVersion", "1.4.0"
|
||||
VALUE "FileVersion", "1.5.0"
|
||||
VALUE "InternalName", "libwebpmux.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2024"
|
||||
VALUE "OriginalFilename", "libwebpmux.dll"
|
||||
VALUE "ProductName", "WebP Image Muxer"
|
||||
VALUE "ProductVersion", "1.4.0"
|
||||
VALUE "ProductVersion", "1.5.0"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
@ -28,7 +28,7 @@ extern "C" {
|
||||
// Defines and constants.
|
||||
|
||||
#define MUX_MAJ_VERSION 1
|
||||
#define MUX_MIN_VERSION 4
|
||||
#define MUX_MIN_VERSION 5
|
||||
#define MUX_REV_VERSION 0
|
||||
|
||||
// Chunk object.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user